diff --git a/.travis.yml b/.travis.yml
index 88754db05..4cfbbd451 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -43,6 +43,12 @@ after_success:
# Upload the shaded jar. It's named with git describe, so begins with the last tag name, so begins with the letter v.
# On the other hand, the main un-shaded artifact will begin with the project name 'r5'.
- aws s3 cp --recursive --exclude "*" --include "v*.jar" /home/travis/build/conveyal/r5/target s3://r5-builds
+ # copy Javadoc to S3
+ # The syntax below sets JAVADOC_TARGET to TRAVIS_TAG if defined otherwise TRAVIS_BRANCH
+ # If TRAVIS_TAG is defined TRAVIS_BRANCH will be undefined pr travis docs, but travis will run two
+ # builds: one for the tag and one for the commit to the branch.
+ - JAVADOC_TARGET=${TRAVIS_TAG:-$TRAVIS_BRANCH}
+ - if [[ "$TRAVIS_PULL_REQUEST" = false ]]; then mvn javadoc:javadoc; aws s3 cp --cache-control max-age=300 --recursive /home/travis/build/conveyal/r5/target/site/apidocs s3://javadoc.conveyal.com/r5/${TRAVIS_BRANCH}/; fi
sudo: true # sudo: true gets us more memory, which we need, at the expense of slower builds
@@ -55,4 +61,3 @@ cache:
notifications:
slack: conveyal:vqrkN7708qU1OTL9XkbMqQFV
-
diff --git a/LICENSE b/LICENSE
index 65cebebfb..3cb70de14 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
The MIT License (MIT)
-Copyright (c) 2014-2016 Conveyal
+Copyright (c) 2014-2017 Conveyal
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 878fda962..5f448190e 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,8 @@ on this particular point [here](http://conveyal.com/blog/2015/05/04/variation-in
Please follow the Conveyal Java style guide at https://github.com/conveyal/JavaStyle/
+Javadoc for the project is built automatically after every change and published at http://javadoc.conveyal.com/r5/master/
+
## History
R5 grew out of several open-source projects. The focus on analytic applications, and the core development team behind R5,
diff --git a/pom.xml b/pom.xml
index 0375c9880..77cdb9aff 100644
--- a/pom.xml
+++ b/pom.xml
@@ -8,7 +8,7 @@
com.conveyal
r5
- 2.2.0-SNAPSHOT
+ 2.5.0-SNAPSHOT
jar
@@ -72,6 +72,7 @@
true
**/*.properties
+ worker.sh
debug-plan/**
fares/**
diff --git a/src/main/java/com/conveyal/r5/R5Main.java b/src/main/java/com/conveyal/r5/R5Main.java
index 0b6af2608..e86454159 100644
--- a/src/main/java/com/conveyal/r5/R5Main.java
+++ b/src/main/java/com/conveyal/r5/R5Main.java
@@ -17,6 +17,13 @@
*/
public class R5Main {
public static void main (String... args) throws Exception {
+ System.out.println("__________________\n" +
+ "___ __ \\__ ____/\n" +
+ "__ /_/ /_____ \\ \n" +
+ "_ _, _/ ____/ / \n" +
+ "/_/ |_| /_____/ \n" +
+ " ");
+
// Pull argument 0 off as the sub-command,
// then pass the remaining args (1..n) on to that subcommand.
String command = args[0];
diff --git a/src/main/java/com/conveyal/r5/analyst/BootstrapPercentileHypothesisTestGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/BootstrapPercentileHypothesisTestGridStatisticComputer.java
new file mode 100644
index 000000000..e5b022241
--- /dev/null
+++ b/src/main/java/com/conveyal/r5/analyst/BootstrapPercentileHypothesisTestGridStatisticComputer.java
@@ -0,0 +1,85 @@
+package com.conveyal.r5.analyst;
+
+/**
+ * Compute p-values that the two regional analysis results differ due to systematic variation (change in transit network,
+ * percentile of interest, land use, time window, etc.) rather than due to random variation from the Monte Carlo search
+ * process. This uses the confidence interval method of bootstrap hypothesis testing described on page 214 of Efron and
+ * Tibshirani (1993). We perform a two-tailed test, so we are not making an assumptions about whether the change was positive
+ * or negative; a significant value indicates that there was a change, and that the sign is correct.
+ *
+ * This technique works by constructing a confidence interval that has one end at 0 and the other end where it may lie to
+ * make the confidence interval symmetric (i.e. the same amount of density in both tails). We use the percentile method
+ * to calculate this confidence interval; while we are aware there are issues with this, the better bootstrap confidence
+ * interval methods require us to store additional data so we can compute a jackknife estimate of acceleration (Efron
+ * and Tibshirani 1993, ch. 14).
+ *
+ * While it might initially seem that we could use the permutation test described earlier in that chapter, we in fact cannot.
+ * It attractively promises to compare two distributions, which is exactly what we have, but it promises to compare some
+ * statistic of those distributions. We're not interested in the difference in means of the sampling distributions, we're
+ * interested in the the difference of the full sampling distribution. To use the permutation test, we would have to apply
+ * it one level up the stack, when computing the regional results, and compute two regional runs simultaneously so we could
+ * permute them when we still had the travel times for all iterations in memory.
+ *
+ * References
+ * Efron, B., & Tibshirani, R. J. (1993). An Introduction to the Bootstrap. Boca Raton, FL: Chapman and Hall/CRC.
+ */
+public class BootstrapPercentileHypothesisTestGridStatisticComputer extends DualGridStatisticComputer {
+ @Override
+ protected double computeValuesForOrigin(int x, int y, int[] aValues, int[] bValues) {
+ // compute the value
+ int nBelowZero = 0;
+ int nZero = 0;
+ int nAboveZero = 0;
+ int nTotal = 0;
+
+ // get the point estimate of the difference
+ int pointEstimate = bValues[0] - aValues[0];
+ if (pointEstimate == 0) return 0; // no difference, not statistically significant
+
+ // subtract every value in b from every value in a
+ // this creates a bootstrapped sampling distribution of the differences, since each bootstrap sample in each analysis
+ // is independent of all others (we've taken a lot of care to ensure this is the case).
+ for (int aIdx = 1; aIdx < aValues.length; aIdx++) {
+ // TODO a and b values are used more than once. This doesn't create bootstrap dependence, correct?
+ int aVal = aValues[aIdx];
+ for (int bIdx = 1; bIdx < bValues.length; bIdx++, nTotal++) {
+ int bVal = bValues[bIdx];
+ int difference = bVal - aVal;
+
+ if (difference > 0) nAboveZero++;
+ else if (difference < 0) nBelowZero++;
+ else nZero++;
+ }
+ }
+
+ double pVal;
+
+ // if the point estimate was less than zero, assume the confidence interval is less than zero
+ // This could be wrong if the accessibility does not lie on the same side of zero as the majority of the density.
+ // TODO Efron and Tibshirani don't really discuss how to handle that case, and in particular don't discuss two-
+ // tailed tests at all.
+ if (pointEstimate < 0) {
+ // compute the density that lies at or above zero. We have changed the technique slightly from what is
+ // described in Efron and Tibshirani 1993 to explicitly handle values that are exactly 0 (important because
+ // we are working with discretized, integer data).
+ double densityAtOrAboveZero = (double) (nZero + nAboveZero) / nTotal;
+ // two tailed test, take this much density off the other end as well, and that's the p-value for this test.
+ pVal = 2 * densityAtOrAboveZero;
+ } else {
+ // compute the density that lies at or below zero
+ double densityAtOrBelowZero = (double) (nBelowZero + nZero) / nTotal;
+ // two tailed test
+ pVal = 2 * densityAtOrBelowZero;
+ }
+
+ // fix up case where point est does not lie on same side of 0 as majority of density.
+ if (pVal > 1) pVal = 1;
+
+ // scale probability to 0 - 100,000
+ // note that the return value can actually be less than 0 if a majority of the density lies on the
+ // opposite side of zero from the point estimate. Maybe we shouldn't be looking at the point estimate
+ // NB for historical reasons, the probability grids show the probability of change, not the probability of no
+ // change (i.e. the p-value). Invert the values (i.e. replace p with alpha)
+ return (1 - pVal) * 1e5;
+ }
+}
diff --git a/src/main/java/com/conveyal/r5/analyst/DualGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/DualGridStatisticComputer.java
new file mode 100644
index 000000000..c0ec86232
--- /dev/null
+++ b/src/main/java/com/conveyal/r5/analyst/DualGridStatisticComputer.java
@@ -0,0 +1,130 @@
+package com.conveyal.r5.analyst;
+
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3Client;
+import com.amazonaws.services.s3.model.S3Object;
+import com.google.common.io.LittleEndianDataInputStream;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * A DualGridStatisticComputer computes statistics based on two grids.
+ */
+public abstract class DualGridStatisticComputer {
+ private static final AmazonS3 s3 = new AmazonS3Client();
+ /** Version of the access grid format we read */
+ private static final int ACCESS_GRID_VERSION = 0;
+
+ /**
+ * Calculate the probability at each origin that a random individual sample from regional analysis B is larger than one from regional
+ * analysis A. We do this empirically and exhaustively by for each origin looping over every possible combination of
+ * samples and taking a difference, then evaluating the number that yielded results greater than zero.
+ *
+ * The regional analysis access grids must be of identical size and zoom level, and a Grid object (the same as is used
+ * for destination grids) will be returned, with probabilities scaled from 0 to 100,000.
+ */
+ public Grid computeImprovementProbability (String resultBucket, String regionalAnalysisAKey, String regionalAnalysisBKey) throws IOException {
+ S3Object aGrid = s3.getObject(resultBucket, regionalAnalysisAKey);
+ S3Object bGrid = s3.getObject(resultBucket, regionalAnalysisBKey);
+ return computeImprovementProbability(aGrid.getObjectContent(), bGrid.getObjectContent());
+ }
+
+ public Grid computeImprovementProbability(InputStream a, InputStream b) throws IOException {
+ LittleEndianDataInputStream aIn = new LittleEndianDataInputStream(new GZIPInputStream(a));
+ LittleEndianDataInputStream bIn = new LittleEndianDataInputStream(new GZIPInputStream(b));
+
+ validateHeaderAndVersion(aIn);
+ validateHeaderAndVersion(bIn);
+
+ int aZoom = aIn.readInt();
+ int aWest = aIn.readInt();
+ int aNorth = aIn.readInt();
+ int aWidth = aIn.readInt();
+ int aHeight = aIn.readInt();
+
+ int bZoom = bIn.readInt();
+ int bWest = bIn.readInt();
+ int bNorth = bIn.readInt();
+ int bWidth = bIn.readInt();
+ int bHeight = bIn.readInt();
+
+ if (aZoom != bZoom ||
+ aWest != bWest ||
+ aNorth != bNorth ||
+ aWidth != bWidth ||
+ aHeight != bHeight) {
+ throw new IllegalArgumentException("Grid sizes for comparison must be identical!");
+ }
+
+ // number of iterations need not be equal, the computed probability is still valid even if they are not
+ // as the probability of choosing any particular sample is still uniform within each scenario.
+ int aIterations = aIn.readInt();
+ int bIterations = bIn.readInt();
+
+ Grid out = new Grid(aZoom, aWidth, aHeight, aNorth, aWest);
+
+ // pixels are in row-major order, iterate over y on outside
+ for (int y = 0; y < aHeight; y++) {
+ for (int x = 0; x < aWidth; x++) {
+ int[] aValues = new int[aIterations];
+ int[] bValues = new int[bIterations];
+
+ for (int iteration = 0, val = 0; iteration < aIterations; iteration++) {
+ aValues[iteration] = (val += aIn.readInt());
+ }
+
+ for (int iteration = 0, val = 0; iteration < bIterations; iteration++) {
+ bValues[iteration] = (val += bIn.readInt());
+ }
+
+ out.grid[x][y] = computeValuesForOrigin(x, y, aValues, bValues);
+ }
+ }
+
+ return out;
+ }
+
+ private static void validateHeaderAndVersion(LittleEndianDataInputStream input) throws IOException {
+ char[] header = new char[8];
+ for (int i = 0; i < 8; i++) {
+ header[i] = (char) input.readByte();
+ }
+
+ if (!"ACCESSGR".equals(new String(header))) {
+ throw new IllegalArgumentException("Input not in access grid format!");
+ }
+
+ int version = input.readInt();
+
+ if (version != ACCESS_GRID_VERSION) {
+ throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version));
+ }
+ }
+
+ /** Given the origin coordinates and the values from the two grids, compute a value for the output grid */
+ protected abstract double computeValuesForOrigin (int x, int y, int[] aValues, int[] bValues);
+
+ public static void main (String... args) throws IOException {
+ DualGridStatisticComputer comp;
+ if ("--two-tailed".equals(args[0])) {
+ comp = new BootstrapPercentileHypothesisTestGridStatisticComputer();
+ } else {
+ throw new RuntimeException("Unknown grid statistic computer " + args[0]);
+ }
+
+ FileInputStream a = new FileInputStream(args[1]);
+ FileInputStream b = new FileInputStream(args[2]);
+ Grid grid = comp.computeImprovementProbability(a, b);
+
+ if (args[3].endsWith(".grid"))
+ grid.write(new FileOutputStream(args[3]));
+ else if (args[3].endsWith(".png"))
+ grid.writePng(new FileOutputStream(args[3]));
+ else if (args[3].endsWith(".tif"))
+ grid.writeGeotiff(new FileOutputStream(args[3]));
+ }
+}
diff --git a/src/main/java/com/conveyal/r5/analyst/ExtractingGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/ExtractingGridStatisticComputer.java
new file mode 100644
index 000000000..28a174470
--- /dev/null
+++ b/src/main/java/com/conveyal/r5/analyst/ExtractingGridStatisticComputer.java
@@ -0,0 +1,38 @@
+package com.conveyal.r5.analyst;
+
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3Client;
+import com.amazonaws.services.s3.model.S3Object;
+import com.conveyal.r5.analyst.scenario.GridStatisticComputer;
+import com.google.common.io.LittleEndianDataInputStream;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * Access grids are three-dimensional arrays, with the first two dimensions consisting of x and y coordinates of origins
+ * within the regional analysis, and the third dimension reflects multiple values of the indicator of interest. This could
+ * be instantaneous accessibility results for each Monte Carlo draw when computing average instantaneous accessibility (i.e.
+ * Owen-style accessibility), or it could be multiple bootstrap replications of the sampling distribution of accessibility
+ * given median travel time (see Conway, M. W., Byrd, A. and van Eggermond, M. "A Statistical Approach to Comparing
+ * Accessibility Results: Including Uncertainty in Public Transport Sketch Planning," paper presented at the 2017 World
+ * Symposium of Transport and Land Use Research, Brisbane, QLD, Australia, Jul 3-6.)
+ *
+ * An ExtractingGridStatisticComputer simply grabs the value at a particular index within each origin.
+ * When storing bootstrap replications of travel time, we also store the point estimate (using all Monte Carlo draws
+ * equally weighted) as the first value, so an ExtractingGridStatisticComputer(0) can be used to retrieve the point estimate.
+ */
+public class ExtractingGridStatisticComputer extends GridStatisticComputer {
+ public final int index;
+
+ /** Initialize with the index to extract */
+ public ExtractingGridStatisticComputer (int index) {
+ this.index = index;
+ }
+
+ @Override
+ protected double computeValueForOrigin(int x, int y, int[] valuesThisOrigin) {
+ return valuesThisOrigin[index];
+ }
+}
diff --git a/src/main/java/com/conveyal/r5/analyst/Grid.java b/src/main/java/com/conveyal/r5/analyst/Grid.java
index 4b7c64873..7192aa205 100644
--- a/src/main/java/com/conveyal/r5/analyst/Grid.java
+++ b/src/main/java/com/conveyal/r5/analyst/Grid.java
@@ -122,12 +122,24 @@ public Grid (int zoom, int width, int height, int north, int west) {
this.grid = new double[width][height];
}
+ /**
+ * Version of getPixelWeights which returns the weights as relative to the total area of the input geometry (i.e.
+ * the weight at a pixel is the proportion of the input geometry that falls within that pixel.
+ */
+ public TObjectDoubleMap getPixelWeights (Geometry geometry) {
+ return getPixelWeights(geometry, false);
+ }
+
/**
* Get the proportions of an input polygon feature that overlap each grid cell, in the format [x, y] => weight.
* This weight object can then be fed into the incrementFromPixelWeights function to actually burn a polygon into the
* grid.
+ *
+ * If relativeToPixels is true, the weights are the proportion of the pixel that is covered. Otherwise they are the
+ * portion of this polygon which is within the given grid cell. If using incrementPixelWeights, this should be set to
+ * false.
*/
- public TObjectDoubleMap getPixelWeights (Geometry geometry) {
+ public TObjectDoubleMap getPixelWeights (Geometry geometry, boolean relativeToPixels) {
// No need to convert to a local coordinate system
// Both the supplied polygon and the web mercator pixel geometries are left in WGS84 geographic coordinates.
// Both are distorted equally along the X axis at a given latitude so the proportion of the geometry within
@@ -151,7 +163,8 @@ public TObjectDoubleMap getPixelWeights (Geometry geometry) {
Geometry pixel = getPixelGeometry(x + west, y + north, zoom);
Geometry intersection = pixel.intersection(geometry);
- double weight = intersection.getArea() / area;
+ double denominator = relativeToPixels ? pixel.getArea() : area;
+ double weight = intersection.getArea() / denominator;
weights.put(new int[] { x, y }, weight);
}
}
@@ -345,20 +358,34 @@ public void writeShapefile (String fileName, String fieldName) {
/* functions below from http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Mathematics */
+ /** Return the pixel the given longitude falls within */
public static int lonToPixel (double lon, int zoom) {
return (int) ((lon + 180) / 360 * Math.pow(2, zoom) * 256);
}
+ /** return the west side of the given pixel (assuming an integer pixel; noninteger pixels will return the appropriate location within the pixel) */
public static double pixelToLon (double pixel, int zoom) {
return pixel / (Math.pow(2, zoom) * 256) * 360 - 180;
}
+ /** Return the longitude of the center of the given pixel */
+ public static double pixelToCenterLon (int pixel, int zoom) {
+ return pixelToLon(pixel + 0.5, zoom);
+ }
+
+ /** Return the pixel the given latitude falls within */
public static int latToPixel (double lat, int zoom) {
double latRad = FastMath.toRadians(lat);
return (int) ((1 - log(tan(latRad) + 1 / cos(latRad)) / Math.PI) * Math.pow(2, zoom - 1) * 256);
}
- // We're using FastMath here, because the built-in math functions were taking a laarge amount of time in profiling.
+ /** Return the latitude of the center of the given pixel */
+ public static double pixelToCenterLat (int pixel, int zoom) {
+ return pixelToLat(pixel + 0.5, zoom);
+ }
+
+ // We're using FastMath here, because the built-in math functions were taking a large amount of time in profiling.
+ /** return the north side of the given pixel (assuming an integer pixel; noninteger pixels will return the appropriate location within the pixel) */
public static double pixelToLat (double pixel, int zoom) {
return FastMath.toDegrees(atan(sinh(Math.PI - (pixel / 256d) / Math.pow(2, zoom) * 2 * Math.PI)));
}
diff --git a/src/main/java/com/conveyal/r5/analyst/GridBounds.java b/src/main/java/com/conveyal/r5/analyst/GridBounds.java
new file mode 100644
index 000000000..c3d15e202
--- /dev/null
+++ b/src/main/java/com/conveyal/r5/analyst/GridBounds.java
@@ -0,0 +1,8 @@
+package com.conveyal.r5.analyst;
+
+/**
+ * We have a lot of classes which have zoom, west, north, width, and height attributes. We would like to be able to pass
+ * them into functions without breaking out the values of these.
+ */
+public class GridBounds {
+}
diff --git a/src/main/java/com/conveyal/r5/analyst/GridComputer.java b/src/main/java/com/conveyal/r5/analyst/GridComputer.java
index e23e1599a..2f8d039f8 100644
--- a/src/main/java/com/conveyal/r5/analyst/GridComputer.java
+++ b/src/main/java/com/conveyal/r5/analyst/GridComputer.java
@@ -6,40 +6,111 @@
import com.amazonaws.services.sqs.model.SendMessageRequest;
import com.conveyal.r5.analyst.cluster.GridRequest;
import com.conveyal.r5.analyst.cluster.Origin;
-import com.conveyal.r5.analyst.cluster.TaskStatistics;
import com.conveyal.r5.api.util.LegMode;
+import com.conveyal.r5.point_to_point.builder.PointToPointQuery;
import com.conveyal.r5.profile.FastRaptorWorker;
-import com.conveyal.r5.profile.Propagater;
+import com.conveyal.r5.profile.PerTargetPropagater;
import com.conveyal.r5.profile.RaptorWorker;
import com.conveyal.r5.profile.StreetMode;
import com.conveyal.r5.streets.LinkedPointSet;
import com.conveyal.r5.streets.StreetRouter;
import com.conveyal.r5.transit.TransportNetwork;
-import com.google.common.io.LittleEndianDataOutputStream;
import gnu.trove.iterator.TIntIntIterator;
+import gnu.trove.list.TIntList;
+import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.TIntIntMap;
+import gnu.trove.map.hash.TIntIntHashMap;
+import org.apache.commons.math3.random.MersenneTwister;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.util.Arrays;
import java.util.Base64;
-import java.util.stream.Stream;
+import java.util.stream.DoubleStream;
/**
* Computes an accessibility indicator at a single cell in a Web Mercator grid, using destination densities from
- * a Web Mercator density grid. Both grids must be at the same zoom level. The accessibility indicator is calculated
- * separately for each iteration of the range RAPTOR algorithm (each departure minute and randomization of the schedules
- * for the frequency-based routes) and all of these different values of the indicator are retained to allow
- * probabilistic scenario comparison. This does freeze the travel time cutoff and destination grid in the interest of
- * keeping the results down to an acceptable size. The results are placed on an Amazon SQS queue for collation by
- * a GridResultConsumer and a GridResultAssembler.
+ * a Web Mercator density grid. Both grids must be at the same zoom level. This class computes accessibility given median
+ * travel time (described below and in Conway, Byrd and van Eggermond 2017).
*
- * These requests are enqueued by the frontend one for each origin in a regional analysis.
+ * The accessibility is calculated using
+ * median travel time to each destination (there are plans to change this to allow use of arbitrary percentiles in the future). In order
+ * to facilitate probabilistic comparison of scenarios, many accessibility values are returned, representing the
+ * sampling distribution of the accessibility. The first value is the value computed using all the Monte Carlo frequency
+ * draws with equal weight (i.e. it is our best point estimate).
+ *
+ * Subsequent values are produced by bootstrapping the iterations (combinations of a departure minute and a Monte Carlo draw) to simulate
+ * the sampling distribution. For each value returned, we select
+ * _with replacement_ a different set of Monte Carlo draws to use to approximate the effects of repeating the analysis
+ * with completely new Monte Carlo draws. This yields the same number of Monte Carlo draws, but some of the draws from
+ * the original analysis are not present, and some are duplicated. Sounds crazy but it works.
+ *
+ * First, some terminology: a bootstrap replication is the statistic of interest defined on one bootstrap sample,
+ * which is a collection of values sampled with replacement from the original Monte Carlo draws.
+ * For each of the bootstrap samples we compute, we define which Monte Carlo draws should be included. We want
+ * to choose the Monte Carlo draws included in a particular bootstrap sample here, rather than choosing different
+ * Monte Carlo draws at each destination. The most intuitive explanation for this is that this matches the
+ * way our accessibility indicator is defined: each Monte Carlo draw is used at every destination, rather than
+ * computing separate Monte Carlo draws for each destination. Sampling Monte Carlo draws independently at
+ * each destination causes insufficient variance in the resulting sampling
+ * distribution. Extreme values of travel time tend to be correlated across many destinations; a particular Monte
+ * Carlo draw is likely to affect the travel time to all destinations reached by it simultaneously; in the most
+ * extreme case, a change in the Monte Carlo draw on the line serving the origin could make the whole network
+ * reachable, or not, within the travel time threshold.
+ *
+ * This leads us to a more theoretical justification. One of the tenets of the bootstrap is that the
+ * bootstrap samples be independent (Efron and Tibshirani 1993, 46). In situations where the data are not
+ * independent and identically distributed, a number of techniques, e.g. the moving block bootstrap, have been developed
+ * (see Lahiri 2003) Most of these methods
+ * consist of changes to the bootstrap sampling technique to ensure the samples are independent, and the dependence
+ * structure of the data is wrapped up within a sample. While none of the off-the-shelf approaches for dependent
+ * data appear to be helpful for our use case, since we know (or can speculate) the correlation properties of our
+ * data, we can come up with a sampling technique. Since there is a dependence among destinations (many or all
+ * being affected by the same Monte Carlo draw), we use the same bootstrap samples (consisting of Monte Carlo draws)
+ * to compute the travel times to each destination.
+ *
+ * There is also dependence in the departure minutes; Monte Carlo draws from the same departure minute will have
+ * more similar travel times and therefore accessibility values than those from different departure minutes due to the deterministic and constant effect of the
+ * scheduled network. Adjacent departure minutes will additionally have correlated travel times and accessibility values
+ * because transit service is similar (you might have to wait one more minute to catch the same bus).
+ * There are also likely periodic effects at the transit service frequency (e.g. 15 minutes,
+ * 30 minutes, etc.). In testing in Atlanta we did not find ignoring this dependence to be an issue, Patching this up is
+ * fairly simple. Rather than drawing n Monte Carlo draws with replacement for
+ * each bootstrap sample, we draw n / m draws for each departure minute, where n is the number of Monte Carlo draws done in
+ * the original analysis, and m is the number of departure minute. That is, we ensure that the number of Monte Carlo draws
+ * using a particular departure minute is identical in each bootstrap sample and the original, non-bootstrapped point
+ * estimate. This also means that all the variation in the sampling distributions is due to the effect of frequency-based
+ * lines, rather than due to variation due to departure time (which is held constant).
+ *
+ * We initially did not think that the change to sample from within the departure minutes to be that significant (see
+ * http://rpubs.com/mattwigway/bootstrap-dependence). However, it in fact is. It produces narrower sampling distributions
+ * that result entirely from the Monte Carlo simulation of frequency lines, without mixing in systematic variation that
+ * comes from variation due to different departure times. This also should yield p-values that are more correct; because
+ * there is no systematic influence from scheduled lines, 5% of the values should in fact be found statistically significant
+ * when testing the same scenario (which suggests we should probably be using a higher significance threshold, since we are
+ * doing thousands of computations; see Wasserstein and Lazar 2016, "The ASA's statement on p-values,"
+ * http://dx.doi.org/10.1080/00031305.2016.1154108, and Ioanndis 2005, "Why Most Published Research Findings are False",
+ * http://dx.doi.org/10.1371/journal.pmed.0020124.
+ *
+ * For more information on the bootstrap and its application to the computation of accessibility given median accessibility,
+ * see
+ *
+ * Efron, Bradley, and Robert J Tibshirani. An Introduction to the Bootstrap, Boca Raton, FL: Chapman and Hall/CRC, 1993.
+ * Lahiri, S. N. Resampling Methods for Dependent Data, New York: Springer, 2003.
+ * Conway, M. W., Byrd, A. and van Eggermond, M. "A Statistical Approach to Comparing Accessibility Results: Including
+ * Uncertainty in Public Transport Sketch Planning," paper presented at the 2017 World Symposium of Transport and Land
+ * Use Research, Brisbane, Queensland, Australia, Jul 3-6.
+ *
+ * The results are placed on an Amazon SQS queue for collation by a GridResultConsumer and a GridResultAssembler.
*/
public class GridComputer {
private static final Logger LOG = LoggerFactory.getLogger(GridComputer.class);
+ /** The number of bootstrap replications used to bootstrap the sampling distribution of the percentiles */
+ public static final int N_BOOTSTRAP_REPLICATIONS = 1000;
+
/** SQS client. TODO: async? */
private static final AmazonSQS sqs = new AmazonSQSClient();
@@ -65,82 +136,232 @@ public void run() throws IOException {
// ensure they both have the same zoom level
if (request.zoom != grid.zoom) throw new IllegalArgumentException("grid zooms do not match!");
- // NB this will skip all destinations that lie outside query bounds. This is intentional.
- final WebMercatorGridPointSet targets =
- pointSetCache.get(request.zoom, request.west, request.north, request.width, request.height);
-
// use the middle of the grid cell
- request.request.fromLat = Grid.pixelToLat(request.north + request.y + 0.5, request.zoom);
- request.request.fromLon = Grid.pixelToLon(request.west + request.x + 0.5, request.zoom);
+ request.request.fromLat = Grid.pixelToCenterLat(request.north + request.y, request.zoom);
+ request.request.fromLon = Grid.pixelToCenterLon(request.west + request.x, request.zoom);
- // Run the raptor algorithm to get times at each destination for each iteration
+ // Use the extent of the opportunity density grid as the destinations; this avoids having to convert between
+ // coordinate systems, and avoids including thousands of extra points in the weeds where there happens to be a
+ // transit stop. It also means that all data will be included in the analysis even if the user is only analyzing
+ // a small part of the city.
+ WebMercatorGridPointSet destinations = pointSetCache.get(grid);
+ // TODO recast using egress mode
- // first, find the access stops
- StreetMode mode;
- if (request.request.accessModes.contains(LegMode.CAR)) mode = StreetMode.CAR;
- else if (request.request.accessModes.contains(LegMode.BICYCLE)) mode = StreetMode.BICYCLE;
- else mode = StreetMode.WALK;
+ StreetMode accessMode = LegMode.getDominantStreetMode(request.request.accessModes);
+ StreetMode directMode = LegMode.getDominantStreetMode(request.request.directModes);
- LOG.info("Maximum number of rides: {}", request.request.maxRides);
- LOG.info("Maximum trip duration: {}", request.request.maxTripDurationMinutes);
+ if (request.request.transitModes.isEmpty()) {
+ // non-transit search
+ LinkedPointSet linkedDestinations = destinations.link(network.streetLayer, directMode);
- final LinkedPointSet linkedTargets = targets.link(network.streetLayer, mode);
+ StreetRouter sr = new StreetRouter(network.streetLayer);
+ sr.timeLimitSeconds = request.request.maxTripDurationMinutes * 60;
+ sr.streetMode = directMode;
+ sr.profileRequest = request.request;
+ sr.setOrigin(request.request.fromLat, request.request.fromLon);
+ sr.dominanceVariable = StreetRouter.State.RoutingVariable.DURATION_SECONDS;
+ sr.route();
- StreetRouter sr = new StreetRouter(network.streetLayer);
- sr.distanceLimitMeters = 2000;
- sr.setOrigin(request.request.fromLat, request.request.fromLon);
- sr.dominanceVariable = StreetRouter.State.RoutingVariable.DISTANCE_MILLIMETERS;
- sr.route();
+ int offstreetWalkSpeedMillimetersPerSecond = (int) (request.request.getSpeed(directMode) * 1000);
+ int[] travelTimes = linkedDestinations.eval(sr::getTravelTimeToVertex, offstreetWalkSpeedMillimetersPerSecond).travelTimes;
- TIntIntMap reachedStops = sr.getReachedStops();
+ double accessibility = 0;
+ for (int y = 0, index1d = 0; y < grid.height; y++) {
+ for (int x = 0; x < grid.width; x++, index1d++) {
+ if (travelTimes[index1d] < request.cutoffMinutes * 60) {
+ accessibility += grid.grid[x][y];
+ }
+ }
+ }
- // convert millimeters to seconds
- int millimetersPerSecond = (int) (request.request.walkSpeed * 1000);
- for (TIntIntIterator it = reachedStops.iterator(); it.hasNext();) {
- it.advance();
- it.setValue(it.value() / millimetersPerSecond);
- }
+ finish(new int[] { (int) accessibility }); // no uncertainty so no bootstraps
+ } else {
+ // always walk at egress
+ LinkedPointSet linkedDestinationsEgress = destinations.link(network.streetLayer, StreetMode.WALK);
+ // if the access mode is also walk, the link function will use its cache to return the same linkedpointset
+ // NB Should use direct mode but then we'd have to run the street search twice.
+ if (!request.request.directModes.equals(request.request.accessModes)) {
+ LOG.warn("Disparate direct modes and access modes are not supported in analysis mode.");
+ }
+ LinkedPointSet linkedDestinationsAccess = destinations.link(network.streetLayer, accessMode);
- FastRaptorWorker router = new FastRaptorWorker(network.transitLayer, request.request, reachedStops);
-
- // Run the raptor algorithm
- int[][] timesAtStopsEachIteration = router.route();
-
- // Do propagation
- int[] nonTransferTravelTimesToStops = linkedTargets.eval(sr::getTravelTimeToVertex).travelTimes;
- Propagater propagater =
- new Propagater(timesAtStopsEachIteration, nonTransferTravelTimesToStops, linkedTargets, request.request);
-
- // save the instantaneous accessibility at each minute/iteration, later we will use this to compute probabilities
- // of improvement.
- // This means we have the fungibility issue described in AndrewOwenMeanGridStatisticComputer.
- int[] accessibilityPerIteration = propagater.propagate(times -> {
- int access = 0;
- // times in row-major order, convert to grid coordinates
- // TODO use consistent grids for all data in a project
- for (int gridy = 0; gridy < grid.height; gridy++) {
- int reqy = gridy + grid.north - request.north;
- if (reqy < 0 || reqy >= request.height) continue; // outside of project bounds
-
- for (int gridx = 0; gridx < grid.width; gridx++) {
- int reqx = gridx + grid.west - request.west;
- if (reqx < 0 || reqx >= request.width) continue; // outside of project bounds
-
- int index = reqy * request.width + reqx;
- if (times[index] < request.cutoffMinutes * 60) {
- access += grid.grid[gridx][gridy];
- }
+ // Transit search, run the raptor algorithm to get times at each destination for each iteration
+ LOG.info("Maximum number of rides: {}", request.request.maxRides);
+ LOG.info("Maximum trip duration: {}", request.request.maxTripDurationMinutes);
+
+ // first, find the access stops
+ StreetRouter sr = new StreetRouter(network.streetLayer);
+ sr.profileRequest = request.request;
+
+ TIntIntMap accessTimes;
+ int offstreetTravelSpeedMillimetersPerSecond = (int) (request.request.getSpeed(accessMode) * 1000);
+ int[] nonTransitTravelTimesToDestinations;
+
+ if (request.request.accessModes.contains(LegMode.CAR_PARK)) {
+ //Currently first search from origin to P+R is hardcoded as time dominance variable for Max car time seconds
+ //Second search from P+R to stops is not actually a search we just return list of all reached stops for each found P+R.
+ // If multiple P+Rs reach the same stop, only one with shortest time is returned. Stops were searched for during graph building phase.
+ // time to stop is time from CAR streetrouter to stop + CAR PARK time + time to walk to stop based on request walk speed
+ //by default 20 CAR PARKS are found it can be changed with sr.maxVertices variable
+ sr = PointToPointQuery.findParkRidePath(request.request, sr, network.transitLayer);
+
+ if (sr == null) {
+ // Origin not found. Return an empty access times map, as is done by the other conditions for other modes.
+ // TODO this is ugly. we should have a way to break out of the search early (here and in other methods).
+ accessTimes = new TIntIntHashMap();
+ } else {
+ accessTimes = sr.getReachedStops();
+ }
+
+ // disallow non-transit access
+ // TODO should we allow non transit access with park and ride?
+ nonTransitTravelTimesToDestinations = new int[linkedDestinationsAccess.size()];
+ Arrays.fill(nonTransitTravelTimesToDestinations, RaptorWorker.UNREACHED);
+ } else if (accessMode == StreetMode.WALK) {
+ // Special handling for walk search, find distance in seconds and divide to match behavior at egress
+ // (in stop trees). For bike/car searches this is immaterial as the access searches are already asymmetric.
+ sr.streetMode = accessMode;
+ sr.distanceLimitMeters = 2000; // TODO hardwired same as traveltimesurfacecomputer
+ sr.setOrigin(request.request.fromLat, request.request.fromLon);
+ sr.dominanceVariable = StreetRouter.State.RoutingVariable.DISTANCE_MILLIMETERS;
+ sr.route();
+
+ // Get the travel times to all stops reached in the initial on-street search. Convert distances to speeds.
+ // getReachedStops returns distances in this case since dominance variable is millimeters,
+ // so convert to times in the loop below
+ accessTimes = sr.getReachedStops();
+ for (TIntIntIterator it = accessTimes.iterator(); it.hasNext(); ) {
+ it.advance();
+ it.setValue(it.value() / offstreetTravelSpeedMillimetersPerSecond);
}
+ // again, use distance / speed rather than time for symmetry with other searches
+ final StreetRouter effectivelyFinalSr = sr;
+ nonTransitTravelTimesToDestinations =
+ linkedDestinationsAccess.eval(v -> {
+ StreetRouter.State state = effectivelyFinalSr.getStateAtVertex(v);
+ if (state == null) return RaptorWorker.UNREACHED;
+ else return state.distance / offstreetTravelSpeedMillimetersPerSecond;
+ },
+ offstreetTravelSpeedMillimetersPerSecond).travelTimes;
+ } else {
+ // Other modes are already asymmetric with the egress/stop trees, so just do a time-based on street
+ // search and don't worry about distance limiting.
+ sr.streetMode = accessMode;
+ sr.timeLimitSeconds = request.request.getMaxAccessTime(accessMode);
+ sr.setOrigin(request.request.fromLat, request.request.fromLon);
+ sr.dominanceVariable = StreetRouter.State.RoutingVariable.DURATION_SECONDS;
+ sr.route();
+ accessTimes = sr.getReachedStops(); // already in seconds
+ nonTransitTravelTimesToDestinations =
+ linkedDestinationsAccess.eval(sr::getTravelTimeToVertex, offstreetTravelSpeedMillimetersPerSecond)
+ .travelTimes;
}
- return Math.round(access);
- });
+ FastRaptorWorker router = new FastRaptorWorker(network.transitLayer, request.request, accessTimes);
+
+ // Run the raptor algorithm
+ int[][] timesAtStopsEachIteration = router.route();
+
+ // compute bootstrap weights, see comments in Javadoc detailing how we compute the weights we're using
+ // the Mersenne Twister is a fast, high-quality RNG well-suited to Monte Carlo situations
+ MersenneTwister twister = new MersenneTwister();
+
+ // This stores the number of times each Monte Carlo draw is included in each bootstrap sample, which could be
+ // 0, 1 or more. We store the weights on each iteration rather than a list of iterations because it allows
+ // us to easily construct the weights s.t. they sum to the original number of MC draws.
+ int[][] bootstrapWeights = new int[N_BOOTSTRAP_REPLICATIONS + 1][router.nMinutes * router.monteCarloDrawsPerMinute];
+
+ Arrays.fill(bootstrapWeights[0], 1); // equal weight to all observations for first sample
+
+ for (int bootstrap = 1; bootstrap < bootstrapWeights.length; bootstrap++) {
+ for (int minute = 0; minute < router.nMinutes; minute++) {
+ for (int draw = 0; draw < router.monteCarloDrawsPerMinute; draw++) {
+ int iteration = minute * router.monteCarloDrawsPerMinute + twister.nextInt(router.monteCarloDrawsPerMinute);
+ bootstrapWeights[bootstrap][iteration]++;
+ }
+ }
+ }
+
+ // the minimum number of times a destination must be reachable in a single bootstrap sample to be considered
+ // reachable.
+ int minCount = (int) (router.nMinutes * router.monteCarloDrawsPerMinute * (request.travelTimePercentile / 100d));
+
+ // Do propagation of travel times from transit stops to the destinations
+ PerTargetPropagater propagater =
+ new PerTargetPropagater(timesAtStopsEachIteration, nonTransitTravelTimesToDestinations, linkedDestinationsEgress, request.request, request.cutoffMinutes * 60);
+
+ // store the accessibility results for each bootstrap replication
+ double[] bootstrapReplications = new double[N_BOOTSTRAP_REPLICATIONS + 1];
+
+ // The lambda will be called with a boolean array of whether the target is reachable within the cutoff at each
+ // Monte Carlo draw. These are then bootstrapped to create the sampling distribution.
+ propagater.propagate((target, reachable) -> {
+ int gridx = target % grid.width;
+ int gridy = target / grid.width;
+ double opportunityCountAtTarget = grid.grid[gridx][gridy];
+
+ // as an optimization, don't even bother to compute the sampling distribution at cells that contain no
+ // opportunities.
+ if (opportunityCountAtTarget < 1e-6) return;
+
+ // index the Monte Carlo iterations in which the destination was reached within the travel time cutoff,
+ // so we can skip over the non-reachable ones in bootstrap computations.
+ // this improves computation speed (verified)
+ TIntList reachableInIterationsList = new TIntArrayList();
+
+ for (int i = 0; i < reachable.length; i++) {
+ if (reachable[i]) reachableInIterationsList.add(i);
+ }
+
+ int[] reachableInIterations = reachableInIterationsList.toArray();
+
+ boolean isAlwaysReachableWithinTravelTimeCutoff =
+ reachableInIterations.length == timesAtStopsEachIteration.length;
+ boolean isNeverReachableWithinTravelTimeCutoff = reachableInIterations.length == 0;
+
+ // Optimization: only bootstrap if some of the travel times are above the cutoff and some below.
+ // If a destination is always reachable, it will perforce be reachable always in every bootstrap
+ // sample, so there is no need to compute the bootstraps, and similarly if it is never reachable.
+ if (isAlwaysReachableWithinTravelTimeCutoff) {
+ // this destination is always reachable and will be included in all bootstrap samples, no need to do the
+ // bootstrapping
+ // possible optimization: have a variable that persists between calls to this lambda and increment
+ // that single value, then add that value to each replication at the end; reduces the number of additions.
+ for (int i = 0; i < bootstrapReplications.length; i++)
+ bootstrapReplications[i] += grid.grid[gridx][gridy];
+ } else if (isNeverReachableWithinTravelTimeCutoff) {
+ // do nothing, never reachable, does not impact accessibility
+ } else {
+ // This origin is sometimes reachable within the time window, do bootstrapping to determine
+ // the distribution of how often
+ for (int bootstrap = 0; bootstrap < N_BOOTSTRAP_REPLICATIONS + 1; bootstrap++) {
+ int count = 0;
+ for (int iteration : reachableInIterations) {
+ count += bootstrapWeights[bootstrap][iteration];
+ }
+
+ // TODO sigmoidal rolloff here, to avoid artifacts from large destinations that jump a few seconds
+ // in or out of the cutoff.
+ if (count > minCount) {
+ bootstrapReplications[bootstrap] += opportunityCountAtTarget;
+ }
+ }
+ }
+ });
+
+ // round (not cast/floor) these all to ints.
+ int[] intReplications = DoubleStream.of(bootstrapReplications).mapToInt(d -> (int) Math.round(d)).toArray();
+ finish(intReplications);
+ }
+ }
+
+ private void finish (int[] samples) throws IOException {
// now construct the output
// these things are tiny, no problem storing in memory
ByteArrayOutputStream baos = new ByteArrayOutputStream();
- new Origin(request, accessibilityPerIteration).write(baos);
+ new Origin(request, samples).write(baos);
// send this origin to an SQS queue as a binary payload; it will be consumed by GridResultConsumer
// and GridResultAssembler
diff --git a/src/main/java/com/conveyal/r5/analyst/ImprovementProbabilityGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/ImprovementProbabilityGridStatisticComputer.java
index 492a70565..f2e751250 100644
--- a/src/main/java/com/conveyal/r5/analyst/ImprovementProbabilityGridStatisticComputer.java
+++ b/src/main/java/com/conveyal/r5/analyst/ImprovementProbabilityGridStatisticComputer.java
@@ -14,107 +14,28 @@
* used to create grid B will produce a higher accessibility value than the scenario used to compute grid A. Since this
* uses instantaneous accessibility (based on the accessibility at a single minute and Monte Carlo draw), it is in the same
* family as the AndrewOwenMeanGridStatisticComputer, and has the same fungibility concerns documented there.
+ *
+ * Note that this does not extend GridStatisticComputer as it works with two access grids simultaneously.
*/
-public class ImprovementProbabilityGridStatisticComputer {
- private static final AmazonS3 s3 = new AmazonS3Client();
-
- /** Version of the access grid format we read */
- private static final int ACCESS_GRID_VERSION = 0;
-
- /**
- * Calculate the probability at each origin that a random individual sample from regional analysis B is larger than one from regional
- * analysis A. We do this empirically and exhaustively by for each origin looping over every possible combination of
- * samples and taking a difference, then evaluating the number that yielded results greater than zero.
- *
- * The regional analysis access grids must be of identical size and zoom level, and a Grid object (the same as is used
- * for destination grids) will be returned, with probabilities scaled from 0 to 100,000.
- */
- public static Grid computeImprovementProbability (String resultBucket, String regionalAnalysisAKey, String regionalAnalysisBKey) throws IOException {
- S3Object baseGrid = s3.getObject(resultBucket, regionalAnalysisAKey);
- S3Object scenarioGrid = s3.getObject(resultBucket, regionalAnalysisBKey);
- LittleEndianDataInputStream base = new LittleEndianDataInputStream(new GZIPInputStream(baseGrid.getObjectContent()));
- LittleEndianDataInputStream scenario = new LittleEndianDataInputStream(new GZIPInputStream(scenarioGrid.getObjectContent()));
- validateHeaderAndVersion(base);
- validateHeaderAndVersion(scenario);
-
- int zoom = base.readInt();
- int west = base.readInt();
- int north = base.readInt();
- int width = base.readInt();
- int height = base.readInt();
-
- int scenarioZoom = scenario.readInt();
- int scenarioWest = scenario.readInt();
- int scenarioNorth = scenario.readInt();
- int scenarioWidth = scenario.readInt();
- int scenarioHeight = scenario.readInt();
-
- if (zoom != scenarioZoom ||
- west != scenarioWest ||
- north != scenarioNorth ||
- width != scenarioWidth ||
- height != scenarioHeight) {
- throw new IllegalArgumentException("Grid sizes for comparison must be identical!");
- }
-
- // number of iterations need not be equal, the computed probability is still valid even if they are not
- // as the probability of choosing any particular sample is still uniform within each scenario.
- int baseIterations = base.readInt();
- int scenarioIterations = scenario.readInt();
-
- Grid out = new Grid(zoom, width, height, north, west);
-
- // pixels are in row-major order, iterate over y on outside
- for (int y = 0; y < height; y++) {
- for (int x = 0; x < width; x++) {
- int[] baseValues = new int[baseIterations];
- int[] scenarioValues = new int[scenarioIterations];
- int positive = 0;
-
- for (int iteration = 0, val = 0; iteration < baseIterations; iteration++) {
- baseValues[iteration] = (val += base.readInt());
- }
-
- for (int iteration = 0, val = 0; iteration < scenarioIterations; iteration++) {
- scenarioValues[iteration] = (val += scenario.readInt());
- }
-
- // loop over all scenarion and base values for this cell and determine how many have positive difference
- // this is effectively a convolution, we are calculating the difference (a special case of the sum) for
- // two empirical distributions. At some point in the future we could save the probability distribution of
- // changes rather than the probability of improvement.
- for (int scenarioVal : scenarioValues) {
- for (int baseVal : baseValues) {
- if (scenarioVal > baseVal) positive++;
- }
- }
-
- // Store as probabilities on 0 to 100,000 scale, and store in the grid.
- // We are using the same classes used for opportunity grids here, so they are stored as doubles, and
- // will be cast to ints when saved. This is to avoid rounding error when projecting opportunities into
- // the grids, which we're not doing here, but these grids are small enough the use of doubles instead of ints
- // is relatively harmless.
- out.grid[x][y] = (double) positive / (scenarioIterations * baseIterations) * 100_000;
+public class ImprovementProbabilityGridStatisticComputer extends DualGridStatisticComputer {
+ @Override
+ protected double computeValuesForOrigin(int x, int y, int[] aValues, int[] bValues) {
+ int positive = 0;
+ // loop over all scenario and base values for this cell and determine how many have positive difference
+ // this is effectively a convolution, we are calculating the difference (a special case of the sum) for
+ // two empirical distributions. At some point in the future we could save the probability distribution of
+ // changes rather than the probability of improvement.
+ for (int scenarioVal : bValues) {
+ for (int baseVal : aValues) {
+ if (scenarioVal > baseVal) positive++;
}
}
- return out;
- }
-
- private static void validateHeaderAndVersion(LittleEndianDataInputStream input) throws IOException {
- char[] header = new char[8];
- for (int i = 0; i < 8; i++) {
- header[i] = (char) input.readByte();
- }
-
- if (!"ACCESSGR".equals(new String(header))) {
- throw new IllegalArgumentException("Input not in access grid format!");
- }
-
- int version = input.readInt();
-
- if (version != ACCESS_GRID_VERSION) {
- throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version));
- }
+ // Store as probabilities on 0 to 100,000 scale, and store in the grid.
+ // We are using the same classes used for opportunity grids here, so they are stored as doubles, and
+ // will be cast to ints when saved. This is to avoid rounding error when projecting opportunities into
+ // the grids, which we're not doing here, but these grids are small enough the use of doubles instead of ints
+ // is relatively harmless.
+ return (double) positive / (aValues.length * bValues.length) * 100_000;
}
}
diff --git a/src/main/java/com/conveyal/r5/analyst/PercentileGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/PercentileGridStatisticComputer.java
index 39d4cbba5..fee905e55 100644
--- a/src/main/java/com/conveyal/r5/analyst/PercentileGridStatisticComputer.java
+++ b/src/main/java/com/conveyal/r5/analyst/PercentileGridStatisticComputer.java
@@ -3,6 +3,7 @@
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.S3Object;
+import com.conveyal.r5.analyst.scenario.GridStatisticComputer;
import com.google.common.io.LittleEndianDataInputStream;
import com.google.common.primitives.Ints;
import org.apache.commons.math3.stat.descriptive.rank.Percentile;
@@ -16,70 +17,22 @@
* Take an access grid, which is a grid containing a vector of accessibility at each origin, and collapse to grid of
* scalars containing a percentile of accessibility for each origin.
*/
-public class PercentileGridStatisticComputer {
- private static final AmazonS3 s3 = new AmazonS3Client();
+public class PercentileGridStatisticComputer extends GridStatisticComputer {
+ public final double percentile;
- /** Version of the access grid format we read */
- private static final int ACCESS_GRID_VERSION = 0;
-
- /** Compute a particular percentile of a grid (percentile between 0 and 100) */
- public static Grid computePercentile (String resultsBucket, String key, double percentile) throws IOException {
- percentile /= 100;
-
- if (percentile < 0 || percentile > 1) {
- throw new IllegalArgumentException("Invalid percentile!");
- }
-
- S3Object accessGrid = s3.getObject(resultsBucket, key);
-
- LittleEndianDataInputStream input = new LittleEndianDataInputStream(new GZIPInputStream(accessGrid.getObjectContent()));
-
- char[] header = new char[8];
- for (int i = 0; i < 8; i++) {
- header[i] = (char) input.readByte();
- }
-
- if (!"ACCESSGR".equals(new String(header))) {
- throw new IllegalArgumentException("Input not in access grid format!");
- }
-
- int version = input.readInt();
-
- if (version != ACCESS_GRID_VERSION) {
- throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version));
- }
-
- int zoom = input.readInt();
- int west = input.readInt();
- int north = input.readInt();
- int width = input.readInt();
- int height = input.readInt();
- int nIterations = input.readInt();
-
- Grid grid = new Grid(zoom, width, height, north, west);
-
- for (int y = 0; y < height; y++) {
- for (int x = 0; x < width; x++) {
- int[] values = new int[nIterations];
-
- for (int iteration = 0, val = 0; iteration < nIterations; iteration++) {
- values[iteration] = (val += input.readInt());
- }
-
- // compute percentiles
- Arrays.sort(values);
- double index = percentile * (values.length - 1);
- int lowerIndex = (int) Math.floor(index);
- int upperIndex = (int) Math.ceil(index);
- double remainder = index % 1;
- // weight the upper and lower values by where the percentile falls between them (linear interpolation)
- double val = values[upperIndex] * remainder + values[lowerIndex] * (1 - remainder);
- grid.grid[x][y] = val;
- }
- }
-
- input.close();
+ public PercentileGridStatisticComputer (double percentile) {
+ this.percentile = percentile;
+ }
- return grid;
+ @Override
+ protected double computeValueForOrigin(int x, int y, int[] valuesThisOrigin) {
+ // compute percentiles
+ Arrays.sort(valuesThisOrigin);
+ double index = percentile / 100 * (valuesThisOrigin.length - 1);
+ int lowerIndex = (int) Math.floor(index);
+ int upperIndex = (int) Math.ceil(index);
+ double remainder = index % 1;
+ // weight the upper and lower values by where the percentile falls between them (linear interpolation)
+ return valuesThisOrigin[upperIndex] * remainder + valuesThisOrigin[lowerIndex] * (1 - remainder);
}
}
diff --git a/src/main/java/com/conveyal/r5/analyst/TravelTimeSurfaceComputer.java b/src/main/java/com/conveyal/r5/analyst/TravelTimeSurfaceComputer.java
new file mode 100644
index 000000000..a3fdd031b
--- /dev/null
+++ b/src/main/java/com/conveyal/r5/analyst/TravelTimeSurfaceComputer.java
@@ -0,0 +1,224 @@
+package com.conveyal.r5.analyst;
+
+import com.conveyal.r5.analyst.cluster.AccessGridWriter;
+import com.conveyal.r5.analyst.cluster.TravelTimeSurfaceRequest;
+import com.conveyal.r5.analyst.error.TaskError;
+import com.conveyal.r5.api.util.LegMode;
+import com.conveyal.r5.common.JsonUtilities;
+import com.conveyal.r5.point_to_point.builder.PointToPointQuery;
+import com.conveyal.r5.profile.FastRaptorWorker;
+import com.conveyal.r5.profile.PerTargetPropagater;
+import com.conveyal.r5.profile.RaptorWorker;
+import com.conveyal.r5.profile.StreetMode;
+import com.conveyal.r5.streets.LinkedPointSet;
+import com.conveyal.r5.streets.StreetRouter;
+import com.conveyal.r5.transit.TransportNetwork;
+import gnu.trove.iterator.TIntIntIterator;
+import gnu.trove.map.TIntIntMap;
+import gnu.trove.map.hash.TIntIntHashMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.stream.IntStream;
+
+/**
+ * This computes a travel time surface and returns it in access grid format, with each pixel of the grid containing
+ * different percentiles of travel time requested by the frontend.
+ */
+public class TravelTimeSurfaceComputer {
+ private static final Logger LOG = LoggerFactory.getLogger(TravelTimeSurfaceComputer.class);
+ private static final WebMercatorGridPointSetCache pointSetCache = new WebMercatorGridPointSetCache();
+
+ public final TravelTimeSurfaceRequest request;
+ public final TransportNetwork network;
+
+ public TravelTimeSurfaceComputer (TravelTimeSurfaceRequest request, TransportNetwork network) {
+ this.request = request;
+ this.network = network;
+ }
+
+ public void write (OutputStream os) throws IOException {
+ StreetMode accessMode = LegMode.getDominantStreetMode(request.request.accessModes);
+ StreetMode directMode = LegMode.getDominantStreetMode(request.request.directModes);
+
+ int nPercentiles = request.percentiles.length;
+
+ WebMercatorGridPointSet destinations = pointSetCache.get(request.zoom, request.west, request.north, request.width, request.height);
+
+ AccessGridWriter output;
+ try {
+ output = new AccessGridWriter(request.zoom, request.west, request.north, request.width, request.height, nPercentiles);
+ } catch (IOException e) {
+ // in memory, should not be able to throw this
+ throw new RuntimeException(e);
+ }
+
+ if (request.request.transitModes.isEmpty()) {
+ // non transit search
+ StreetRouter sr = new StreetRouter(network.streetLayer);
+ sr.timeLimitSeconds = request.request.maxTripDurationMinutes * 60;
+ sr.streetMode = directMode;
+ sr.dominanceVariable = StreetRouter.State.RoutingVariable.DURATION_SECONDS;
+ sr.profileRequest = request.request;
+ sr.setOrigin(request.request.fromLat, request.request.fromLon);
+ sr.route();
+
+ int offstreetTravelSpeedMillimetersPerSecond = (int) (request.request.getSpeed(directMode) * 1000);
+
+ LinkedPointSet linkedDestinations = destinations.link(network.streetLayer, directMode);
+
+ int[] travelTimesToTargets = linkedDestinations.eval(sr::getTravelTimeToVertex, offstreetTravelSpeedMillimetersPerSecond).travelTimes;
+ for (int target = 0; target < travelTimesToTargets.length; target++) {
+ int x = target % request.width;
+ int y = target / request.width;
+
+ final int travelTimeMinutes =
+ travelTimesToTargets[target] == RaptorWorker.UNREACHED ? RaptorWorker.UNREACHED : travelTimesToTargets[target] / 60;
+ // the frontend expects percentiles of travel time. There is no variation in nontransit travel time so
+ // just replicate the same number repeatedly. This could be improved, but at least it will compress well.
+ // int divide (floor) used below as well. TODO is this wise?
+ int[] results = IntStream.range(0, nPercentiles).map(i -> travelTimeMinutes).toArray();
+ try {
+ output.writePixel(x, y, results);
+ } catch (IOException e) {
+ // can't happen as we're not using a file system backed output
+ throw new RuntimeException(e);
+ }
+ }
+ } else {
+ // we always walk to egress from transit, but we may have a different access mode.
+ // if the access mode is also walk, the pointset's linkage cache will return two references to the same
+ // linkage below
+ // TODO use directMode? Is that a resource limiting issue?
+ // also gridcomputer uses accessMode to avoid running two street searches
+ LinkedPointSet linkedDestinationsAccess = destinations.link(network.streetLayer, accessMode);
+ LinkedPointSet linkedDestinationsEgress = destinations.link(network.streetLayer, StreetMode.WALK);
+
+ if (!request.request.directModes.equals(request.request.accessModes)) {
+ LOG.warn("Disparate direct modes and access modes are not supported in analysis mode.");
+ }
+
+ // Perform street search to find transit stops and non-transit times.
+ StreetRouter sr = new StreetRouter(network.streetLayer);
+ sr.profileRequest = request.request;
+
+ TIntIntMap accessTimes;
+ int offstreetTravelSpeedMillimetersPerSecond = (int) (request.request.getSpeed(accessMode) * 1000);
+ int[] nonTransitTravelTimesToDestinations;
+
+ if (request.request.accessModes.contains(LegMode.CAR_PARK)) {
+ //Currently first search from origin to P+R is hardcoded as time dominance variable for Max car time seconds
+ //Second search from P+R to stops is not actually a search we just return list of all reached stops for each found P+R.
+ // If multiple P+Rs reach the same stop, only one with shortest time is returned. Stops were searched for during graph building phase.
+ // time to stop is time from CAR streetrouter to stop + CAR PARK time + time to walk to stop based on request walk speed
+ //by default 20 CAR PARKS are found it can be changed with sr.maxVertices variable
+ sr = PointToPointQuery.findParkRidePath(request.request, sr, network.transitLayer);
+
+ if (sr == null) {
+ // Origin not found. Return an empty access times map, as is done by the other conditions for other modes.
+ // TODO this is ugly. we should have a way to break out of the search early (here and in other methods).
+ accessTimes = new TIntIntHashMap();
+ } else {
+ accessTimes = sr.getReachedStops();
+ }
+
+ // disallow non-transit access
+ // TODO should we allow non transit access with park and ride?
+ nonTransitTravelTimesToDestinations = new int[linkedDestinationsAccess.size()];
+ Arrays.fill(nonTransitTravelTimesToDestinations, RaptorWorker.UNREACHED);
+ } else if (accessMode == StreetMode.WALK) {
+ // Special handling for walk search, find distance in seconds and divide to match behavior at egress
+ // (in stop trees). For bike/car searches this is immaterial as the access searches are already asymmetric.
+ sr.streetMode = accessMode;
+ sr.distanceLimitMeters = 2000; // TODO hardwired same as gridcomputer
+ sr.setOrigin(request.request.fromLat, request.request.fromLon);
+ sr.dominanceVariable = StreetRouter.State.RoutingVariable.DISTANCE_MILLIMETERS;
+ sr.route();
+
+ // Get the travel times to all stops reached in the initial on-street search. Convert distances to speeds.
+ // getReachedStops returns distances in this case since dominance variable is millimeters,
+ // so convert to times in the loop below
+ accessTimes = sr.getReachedStops();
+ for (TIntIntIterator it = accessTimes.iterator(); it.hasNext(); ) {
+ it.advance();
+ it.setValue(it.value() / offstreetTravelSpeedMillimetersPerSecond);
+ }
+
+ // again, use distance / speed rather than time for symmetry with other searches
+ final StreetRouter effectivelyFinalSr = sr;
+ nonTransitTravelTimesToDestinations =
+ linkedDestinationsAccess.eval(v -> {
+ StreetRouter.State state = effectivelyFinalSr.getStateAtVertex(v);
+ if (state == null) return RaptorWorker.UNREACHED;
+ else return state.distance / offstreetTravelSpeedMillimetersPerSecond;
+ },
+ offstreetTravelSpeedMillimetersPerSecond).travelTimes;
+ } else {
+ // Other modes are already asymmetric with the egress/stop trees, so just do a time-based on street
+ // search and don't worry about distance limiting.
+ sr.streetMode = accessMode;
+ sr.timeLimitSeconds = request.request.getMaxAccessTime(accessMode);
+ sr.setOrigin(request.request.fromLat, request.request.fromLon);
+ sr.dominanceVariable = StreetRouter.State.RoutingVariable.DURATION_SECONDS;
+ sr.route();
+ accessTimes = sr.getReachedStops(); // already in seconds
+ nonTransitTravelTimesToDestinations =
+ linkedDestinationsAccess.eval(sr::getTravelTimeToVertex, offstreetTravelSpeedMillimetersPerSecond)
+ .travelTimes;
+ }
+
+ // Create a new Raptor Worker.
+ FastRaptorWorker worker = new FastRaptorWorker(network.transitLayer, request.request, accessTimes);
+
+ // Run the main RAPTOR algorithm to find paths and travel times to all stops in the network.
+ int[][] transitTravelTimesToStops = worker.route();
+
+ PerTargetPropagater perTargetPropagater = new PerTargetPropagater(transitTravelTimesToStops, nonTransitTravelTimesToDestinations, linkedDestinationsEgress, request.request, 120 * 60);
+ perTargetPropagater.propagateTimes((target, times) -> {
+ // sort the times at each target and read off percentiles
+ Arrays.sort(times);
+ int[] results = new int[nPercentiles];
+
+ for (int i = 0; i < nPercentiles; i++) {
+ int offset = (int) Math.round(request.percentiles[i] / 100d * times.length);
+ // Int divide will floor; this is correct because value 0 has travel times of up to one minute, etc.
+ // This means that anything less than a cutoff of (say) 60 minutes (in seconds) will have value 59,
+ // which is what we want. But maybe this is tying the backend and frontend too closely.
+ results[i] = times[offset] == RaptorWorker.UNREACHED ? RaptorWorker.UNREACHED : times[offset] / 60;
+ }
+
+ int x = target % request.width;
+ int y = target / request.width;
+ try {
+ output.writePixel(x, y, results);
+ } catch (IOException e) {
+ // can't happen as we're not using a file system backed output
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ LOG.info("Travel time surface of size {}kb complete", output.getBytes().length / 1000);
+
+ os.write(output.getBytes());
+
+ LOG.info("Travel time surface written, appending metadata with {} warnings", network.scenarioApplicationWarnings.size());
+
+ // Append scenario application warning JSON to result
+ ResultMetadata metadata = new ResultMetadata();
+ metadata.scenarioApplicationWarnings = network.scenarioApplicationWarnings;
+ JsonUtilities.objectMapper.writeValue(os, metadata);
+
+ LOG.info("Done writing");
+
+ os.close();
+ }
+
+ private static class ResultMetadata {
+ public Collection scenarioApplicationWarnings;
+ }
+}
diff --git a/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSetCache.java b/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSetCache.java
index 69143f973..e165c0e6a 100644
--- a/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSetCache.java
+++ b/src/main/java/com/conveyal/r5/analyst/WebMercatorGridPointSetCache.java
@@ -27,6 +27,10 @@ public WebMercatorGridPointSet get (int zoom, int west, int north, int width, in
return cache.computeIfAbsent(key, GridKey::toPointset);
}
+ public WebMercatorGridPointSet get(Grid grid) {
+ return get(grid.zoom, grid.west, grid.north, grid.width, grid.height);
+ }
+
private static class GridKey {
public int zoom;
public int west;
diff --git a/src/main/java/com/conveyal/r5/analyst/broker/Broker.java b/src/main/java/com/conveyal/r5/analyst/broker/Broker.java
index 2263c7edc..acdc0527d 100644
--- a/src/main/java/com/conveyal/r5/analyst/broker/Broker.java
+++ b/src/main/java/com/conveyal/r5/analyst/broker/Broker.java
@@ -12,6 +12,7 @@
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;
+import com.google.common.io.ByteStreams;
import gnu.trove.map.TIntObjectMap;
import gnu.trove.map.TObjectLongMap;
import gnu.trove.map.hash.TIntObjectHashMap;
@@ -24,6 +25,7 @@
import org.slf4j.LoggerFactory;
import java.io.*;
+import java.text.MessageFormat;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
@@ -182,7 +184,7 @@ public Broker (Properties brokerConfig, String addr, int port) {
workerConfig.setProperty("auto-shutdown", "true");
}
- // TODO what are these for?
+ // Tags for the workers
workerName = brokerConfig.getProperty("worker-name") != null ? brokerConfig.getProperty("worker-name") : "analyst-worker";
project = brokerConfig.getProperty("project") != null ? brokerConfig.getProperty("project") : "analyst";
@@ -349,7 +351,7 @@ public void createWorkersInCategory (WorkerCategory category) {
return;
}
- // TODO: compute
+ // TODO: should we start multiple workers on large jobs?
int nWorkers = 1;
// There are no workers on this graph with the right worker commit, start some.
@@ -365,13 +367,10 @@ public void createWorkersInCategory (WorkerCategory category) {
// It's fine to just modify the worker config without a protective copy because this method is synchronized.
workerConfig.setProperty("initial-graph-id", category.graphId);
- workerConfig.setProperty("worker-version", category.workerVersion);
// Tell the worker where to get its R5 JAR. This is a Conveyal S3 bucket with HTTP access turned on.
String workerDownloadUrl = String.format("https://r5-builds.s3.amazonaws.com/%s.jar",
category.workerVersion);
- workerConfig.setProperty("download-url", workerDownloadUrl);
- // This is the R5 broker, so always start R5 workers (rather than OTP workers).
- workerConfig.setProperty("main-class", AnalystWorker.class.getName());
+
ByteArrayOutputStream cfg = new ByteArrayOutputStream();
try {
workerConfig.store(cfg, "Worker config");
@@ -380,9 +379,29 @@ public void createWorkersInCategory (WorkerCategory category) {
throw new RuntimeException(e);
}
- // Send the config to the new workers as EC2 "user data"
- String userData = new String(Base64.getEncoder().encode(cfg.toByteArray()));
- req.setUserData(userData);
+ // Read in the startup script
+ // We used to just pass the config to custom AMI, but by constructing a startup script that initializes a stock
+ // Amazon Linux AMI, we don't have to worry about maintaining and keeping our AMI up to date. Amazon Linux applies
+ // important security updates on startup automatically.
+ try {
+ String workerConfigString = cfg.toString();
+ InputStream scriptIs = Broker.class.getClassLoader().getResourceAsStream("worker.sh");
+ ByteArrayOutputStream scriptBaos = new ByteArrayOutputStream();
+ ByteStreams.copy(scriptIs, scriptBaos);
+ scriptIs.close();
+ scriptBaos.close();
+ String scriptTemplate = scriptBaos.toString();
+
+ String logGroup = workerConfig.getProperty("log-group");
+
+ String script = MessageFormat.format(scriptTemplate, workerDownloadUrl, logGroup, workerConfigString);
+
+ // Send the config to the new workers as EC2 "user data"
+ String userData = new String(Base64.getEncoder().encode(script.getBytes()));
+ req.setUserData(userData);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
if (brokerConfig.getProperty("worker-iam-role") != null)
req.setIamInstanceProfile(new IamInstanceProfileSpecification().withArn(brokerConfig.getProperty("worker-iam-role")));
@@ -398,7 +417,7 @@ public void createWorkersInCategory (WorkerCategory category) {
RunInstancesResult res = ec2.runInstances(req);
res.getReservation().getInstances().forEach(i -> {
Collection tags = Arrays.asList(
- new Tag("name", workerName),
+ new Tag("Name", workerName),
new Tag("project", project)
);
i.setTags(tags);
diff --git a/src/main/java/com/conveyal/r5/analyst/broker/BrokerHttpHandler.java b/src/main/java/com/conveyal/r5/analyst/broker/BrokerHttpHandler.java
index 54a9308b2..5282841fc 100644
--- a/src/main/java/com/conveyal/r5/analyst/broker/BrokerHttpHandler.java
+++ b/src/main/java/com/conveyal/r5/analyst/broker/BrokerHttpHandler.java
@@ -205,6 +205,8 @@ else if (request.getMethod() == Method.POST && "complete".equals(command)) {
// believes to be a reliable work result.
suspendedProducerResponse.setStatus(HttpStatus.OK_200);
suspendedProducerResponse.setContentType(request.getContentType());
+ String contentEncoding = request.getHeader("Content-Encoding");
+ if (contentEncoding != null) suspendedProducerResponse.setHeader("Content-Encoding", contentEncoding);
} else {
// The worker is providing an error message because it spotted something wrong with the request.
suspendedProducerResponse.setStatus(Integer.parseInt(successOrError));
diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/AccessGridWriter.java b/src/main/java/com/conveyal/r5/analyst/cluster/AccessGridWriter.java
new file mode 100644
index 000000000..42b6d19aa
--- /dev/null
+++ b/src/main/java/com/conveyal/r5/analyst/cluster/AccessGridWriter.java
@@ -0,0 +1,151 @@
+package com.conveyal.r5.analyst.cluster;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
+/**
+ * Random-access access grid writer - write arrays into offsets in an access grid format file (3-d array)
+ * * Access grids look like this:
+ * Header (ASCII text "ACCESSGR") (note that this header is eight bytes, so the full grid can be mapped into a
+ * Javascript typed array if desired)
+ * Version, 4-byte integer
+ * (4 byte int) Web mercator zoom level
+ * (4 byte int) west (x) edge of the grid, i.e. how many pixels this grid is east of the left edge of the world
+ * (4 byte int) north (y) edge of the grid, i.e. how many pixels this grid is south of the top edge of the world
+ * (4 byte int) width of the grid in pixels
+ * (4 byte int) height of the grid in pixels
+ * (4 byte int) number of values per pixel
+ * (repeated 4-byte int) values of each pixel in row major order. Values within a given pixel are delta coded.
+ */
+public class AccessGridWriter {
+ public static final Logger LOG = LoggerFactory.getLogger(GridResultAssembler.class);
+
+ /** The version of the access grids we produce */
+ public static final int VERSION = 0;
+
+ /** The version of the origins we consume */
+ public static final int ORIGIN_VERSION = 0;
+
+ /** The offset to get to the data section of the access grid file */
+ public static final long HEADER_SIZE = 9 * 4;
+
+ private BufferAbstraction buffer;
+
+ // store as longs so we can use with impunity without fear of overflow
+ private final long zoom, west, north, width, height, nValuesPerPixel;
+
+ /** Create a new access grid writer for a width x height x nValuesPerPixel array. */
+ public AccessGridWriter (int zoom, int west, int north, int width, int height, int nValuesPerPixel) throws IOException {
+ this(null, zoom, west, north, width, height, nValuesPerPixel);
+ }
+
+ /** Create a file-backed AccessGridWriter */
+ public AccessGridWriter (File gridFile, int zoom, int west, int north, int width, int height, int nValuesPerPixel) throws IOException {
+ this.zoom = zoom;
+ this.west = west;
+ this.north = north;
+ this.width = width;
+ this.height = height;
+ this.nValuesPerPixel = nValuesPerPixel;
+
+ long nBytes = (long) width * height * nValuesPerPixel * 4 + HEADER_SIZE;
+ this.buffer = new BufferAbstraction(gridFile, nBytes);
+
+ buffer.writeString(0, "ACCESSGR");
+ buffer.writeInt(8, VERSION);
+ buffer.writeInt(12, zoom);
+ buffer.writeInt(16, west);
+ buffer.writeInt(20, north);
+ buffer.writeInt(24, width);
+ buffer.writeInt(28, height);
+ buffer.writeInt(32, nValuesPerPixel);
+ }
+
+ public void writePixel (int x, int y, int[] pixelValues) throws IOException {
+ if (pixelValues.length != nValuesPerPixel) throw new IllegalArgumentException("Incorrect pixel size!");
+
+ long index1d = (y * width + x) * nValuesPerPixel * 4 + 36;
+
+ int prev = 0;
+ for (int i : pixelValues) {
+ buffer.writeInt(index1d, i - prev);
+ prev = i;
+ index1d += 4;
+ }
+ }
+
+ public void close () throws IOException {
+ this.buffer.close();
+ }
+
+ public byte[] getBytes () {
+ return buffer.getBytes();
+ }
+
+ private static class BufferAbstraction {
+ private RandomAccessFile file;
+ private byte[] buffer;
+
+ public BufferAbstraction (File file, long length) throws IOException {
+ if (file != null) {
+ this.file = new RandomAccessFile(file, "rw");
+ this.file.setLength(length);
+ // clear the contents of the file, don't accidentally leak disk contents
+ for (long i = 0; i < length; i++) {
+ this.file.writeByte(0);
+ }
+ } else {
+ if (length > Integer.MAX_VALUE) throw new IllegalArgumentException("Attempt to create in memory buffer larger than 2GB");
+ this.buffer = new byte[(int) length];
+ }
+ }
+
+ public void writeInt (long offset, int value) throws IOException {
+ byte a = (byte) value;
+ byte b = (byte) (value >> 8);
+ byte c = (byte) (value >> 16);
+ byte d = (byte) (value >> 24);
+
+ if (file != null) {
+ file.seek(offset);
+ // can't use file.writeInt, as it is in typical Java fashion big-endian . . .
+ file.writeByte(a);
+ file.writeByte(b);
+ file.writeByte(c);
+ file.writeByte(d);
+ } else {
+ int offsetInt = (int) offset;
+ buffer[offsetInt] = a;
+ buffer[offsetInt + 1] = b;
+ buffer[offsetInt + 2] = c;
+ buffer[offsetInt + 3] = d;
+ }
+ }
+
+ public void writeString (long offset, String value) throws IOException {
+ if (file != null) {
+ file.seek(offset);
+ file.writeBytes(value);
+ } else {
+ int offsetInt = (int) offset;
+ byte[] bytes = value.getBytes();
+ for (byte byt : bytes) {
+ buffer[offsetInt++] = byt;
+ }
+ }
+ }
+
+ public byte[] getBytes () {
+ if (buffer != null) return buffer;
+ else throw new UnsupportedOperationException("Attempt to retrieve bytes for on-disk access grid.");
+ }
+
+ public void close () throws IOException {
+ if (this.file != null) this.file.close();
+ }
+ }
+}
diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/AnalystWorker.java b/src/main/java/com/conveyal/r5/analyst/cluster/AnalystWorker.java
index 8b8db548f..7de3256f3 100644
--- a/src/main/java/com/conveyal/r5/analyst/cluster/AnalystWorker.java
+++ b/src/main/java/com/conveyal/r5/analyst/cluster/AnalystWorker.java
@@ -6,6 +6,7 @@
import com.amazonaws.services.s3.AmazonS3Client;
import com.conveyal.r5.analyst.GridCache;
import com.conveyal.r5.analyst.GridComputer;
+import com.conveyal.r5.analyst.TravelTimeSurfaceComputer;
import com.conveyal.r5.analyst.error.ScenarioApplicationException;
import com.conveyal.r5.analyst.error.TaskError;
import com.conveyal.r5.api.util.LegMode;
@@ -248,8 +249,10 @@ public AnalystWorker(Properties config, TransportNetworkCache cache) {
public void run() {
// Create executors with up to one thread per processor.
+ // fix size of highPriorityExecutor to avoid broken pipes when threads are killed off before they are done sending data
+ // (not confirmed if this actually works)
int nP = Runtime.getRuntime().availableProcessors();
- highPriorityExecutor = new ThreadPoolExecutor(1, nP, 60, TimeUnit.SECONDS, new ArrayBlockingQueue<>(255));
+ highPriorityExecutor = new ThreadPoolExecutor(nP, nP, 60, TimeUnit.SECONDS, new ArrayBlockingQueue<>(255));
highPriorityExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
batchExecutor = new ThreadPoolExecutor(1, nP, 60, TimeUnit.SECONDS, new ArrayBlockingQueue<>(nP * 2));
batchExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.AbortPolicy());
@@ -266,8 +269,11 @@ public void run() {
// so the worker needs to not come online until it's ready to process requests.
if (networkId != null) {
LOG.info("Pre-loading or building network with ID {}", networkId);
- transportNetworkCache.getNetwork(networkId);
- LOG.info("Done pre-loading network {}", networkId);
+ if (transportNetworkCache.getNetwork(networkId) == null) {
+ LOG.error("Failed to pre-load transport network {}", networkId);
+ } else {
+ LOG.info("Done pre-loading network {}", networkId);
+ }
}
// Start filling the work queues.
@@ -404,6 +410,8 @@ private void handleOneRequest(GenericClusterRequest clusterRequest) {
this.handleStaticMetadataRequest((StaticMetadata.MetadataRequest) clusterRequest, transportNetwork, ts);
} else if (clusterRequest instanceof StaticMetadata.StopTreeRequest) {
this.handleStaticStopTrees((StaticMetadata.StopTreeRequest) clusterRequest, transportNetwork, ts);
+ } else if (clusterRequest instanceof TravelTimeSurfaceRequest) {
+ this.handleTravelTimeSurfaceRequest((TravelTimeSurfaceRequest) clusterRequest, transportNetwork, ts);
} else if (clusterRequest instanceof GridRequest) {
this.handleGridRequest((GridRequest) clusterRequest, transportNetwork, ts);
} else {
@@ -510,6 +518,26 @@ private void handleStaticStopTrees (StaticMetadata.StopTreeRequest request, Tran
}
}
+ /** handle a request for a travel time surface */
+ public void handleTravelTimeSurfaceRequest (TravelTimeSurfaceRequest request, TransportNetwork network, TaskStatistics ts) {
+ TravelTimeSurfaceComputer computer = new TravelTimeSurfaceComputer(request, network);
+ try {
+ PipedInputStream pis = new PipedInputStream();
+
+ // gzip the data before sending it to the broker. Compression ratios here are extreme (100x is not uncommon)
+ // so this will help avoid broken pipes, connection reset by peer, buffer overflows, etc. since these types
+ // of errors are more common on large files.
+ GZIPOutputStream pos = new GZIPOutputStream(new PipedOutputStream(pis));
+
+ finishPriorityTask(request, pis, "application/octet-stream", "gzip");
+
+ computer.write(pos);
+ pos.close();
+ } catch (IOException e) {
+ LOG.error("Error writing travel time surface to broker", e);
+ }
+ }
+
/** Handle a request for access from a Web Mercator grid to a web mercator opportunity density grid (used for regional analysis) */
private void handleGridRequest (GridRequest request, TransportNetwork network, TaskStatistics ts) {
try {
@@ -735,25 +763,32 @@ private void saveBatchTaskResults (AnalystClusterRequest clusterRequest, ResultE
}
}
- /**
- * We have two kinds of output from a worker: we can either write to an object in a bucket on S3, or we can stream
- * output over HTTP to a waiting web service caller. This function handles the latter case. It connects to the
- * cluster broker, signals that the task with a certain ID is being completed, and posts the result back through the
- * broker. The broker then passes the result on to the original requester (usually the analysis web UI).
- *
- * This function will run the HTTP Post operation in a new thread so that this function can return, allowing its
- * caller to write data to the input stream it passed in. This arrangement avoids broken pipes that can happen
- * when the calling thread dies. TODO clarify when and how which thread can die.
- */
public void finishPriorityTask(GenericClusterRequest clusterRequest, InputStream result, String contentType) {
+ finishPriorityTask(clusterRequest, result, contentType, null);
+ }
+
+ /**
+ * We have two kinds of output from a worker: we can either write to an object in a bucket on S3, or we can stream
+ * output over HTTP to a waiting web service caller. This function handles the latter case. It connects to the
+ * cluster broker, signals that the task with a certain ID is being completed, and posts the result back through the
+ * broker. The broker then passes the result on to the original requester (usually the analysis web UI).
+ *
+ * This function will run the HTTP Post operation in a new thread so that this function can return, allowing its
+ * caller to write data to the input stream it passed in. This arrangement avoids broken pipes that can happen
+ * when the calling thread dies. TODO clarify when and how which thread can die.
+ */
+ public void finishPriorityTask(GenericClusterRequest clusterRequest, InputStream result, String contentType, String contentEncoding) {
//CountingInputStream is = new CountingInputStream(result);
+ LOG.info("Returning high-priority results for task {}", clusterRequest.taskId);
+
String url = BROKER_BASE_URL + String.format("/complete/success/%s", clusterRequest.taskId);
HttpPost httpPost = new HttpPost(url);
// TODO reveal any errors etc. that occurred on the worker.
httpPost.setEntity(new InputStreamEntity(result));
httpPost.setHeader("Content-Type", contentType);
+ if (contentEncoding != null) httpPost.setHeader("Content-Encoding", contentEncoding);
taskDeliveryExecutor.execute(() -> {
try {
HttpResponse response = httpClient.execute(httpPost);
diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/GenericClusterRequest.java b/src/main/java/com/conveyal/r5/analyst/cluster/GenericClusterRequest.java
index 90dd18644..0dbaf4e46 100644
--- a/src/main/java/com/conveyal/r5/analyst/cluster/GenericClusterRequest.java
+++ b/src/main/java/com/conveyal/r5/analyst/cluster/GenericClusterRequest.java
@@ -21,7 +21,8 @@
@JsonSubTypes.Type(name = "analyst", value = AnalystClusterRequest.class),
@JsonSubTypes.Type(name = "grid", value = GridRequest.class),
@JsonSubTypes.Type(name = "static-metadata", value = StaticMetadata.MetadataRequest.class),
- @JsonSubTypes.Type(name = "static-stop-trees", value = StaticMetadata.StopTreeRequest.class)
+ @JsonSubTypes.Type(name = "static-stop-trees", value = StaticMetadata.StopTreeRequest.class),
+ @JsonSubTypes.Type(name = "travel-time-surface", value = TravelTimeSurfaceRequest.class)
})
public abstract class GenericClusterRequest implements Serializable {
diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/GridRequest.java b/src/main/java/com/conveyal/r5/analyst/cluster/GridRequest.java
index da2e9e280..d626645a6 100644
--- a/src/main/java/com/conveyal/r5/analyst/cluster/GridRequest.java
+++ b/src/main/java/com/conveyal/r5/analyst/cluster/GridRequest.java
@@ -1,6 +1,7 @@
package com.conveyal.r5.analyst.cluster;
import com.conveyal.r5.profile.ProfileRequest;
+import com.fasterxml.jackson.annotation.JsonInclude;
import java.util.List;
@@ -29,6 +30,10 @@ public class GridRequest extends GenericClusterRequest {
/** Travel time cutoff (minutes) */
public int cutoffMinutes;
+ /** Percentile of travel time */
+ @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+ public int travelTimePercentile = 50;
+
public final String type = "grid";
/** Where should output of this job be saved? */
diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/GridResultAssembler.java b/src/main/java/com/conveyal/r5/analyst/cluster/GridResultAssembler.java
index 5bfb0dd63..9744e21df 100644
--- a/src/main/java/com/conveyal/r5/analyst/cluster/GridResultAssembler.java
+++ b/src/main/java/com/conveyal/r5/analyst/cluster/GridResultAssembler.java
@@ -2,8 +2,6 @@
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
-import com.amazonaws.services.sqs.AmazonSQS;
-import com.amazonaws.services.sqs.AmazonSQSClient;
import com.amazonaws.services.sqs.model.Message;
import com.conveyal.r5.analyst.LittleEndianIntOutputStream;
import com.google.common.io.ByteStreams;
@@ -24,7 +22,6 @@
import java.io.RandomAccessFile;
import java.util.Base64;
import java.util.BitSet;
-import java.util.Locale;
import java.util.zip.GZIPOutputStream;
/**
@@ -134,18 +131,20 @@ public void handleMessage (Message message) {
ByteArrayInputStream bais = new ByteArrayInputStream(body);
Origin origin = Origin.read(bais);
- // if this is not the first request, make sure that we have the correct number of iterations
- if (buffer != null && origin.accessibilityPerIteration.length != this.nIterations) {
- LOG.error("Origin {}, {} has {} iterations, expected {}",
- origin.x, origin.y, origin.accessibilityPerIteration.length, this.nIterations);
+ // if this is not the first request, make sure that we have the correct number of accessibility
+ // samples (either instantaneous accessibility values or bootstrap replications of accessibility given median
+ // travel time, depending on worker version)
+ if (buffer != null && origin.samples.length != this.nIterations) {
+ LOG.error("Origin {}, {} has {} samples, expected {}",
+ origin.x, origin.y, origin.samples.length, this.nIterations);
error = true;
}
// convert to a delta coded byte array
- ByteArrayOutputStream pixelByteOutputStream = new ByteArrayOutputStream(origin.accessibilityPerIteration.length * 4);
+ ByteArrayOutputStream pixelByteOutputStream = new ByteArrayOutputStream(origin.samples.length * 4);
LittleEndianDataOutputStream pixelDataOutputStream = new LittleEndianDataOutputStream(pixelByteOutputStream);
- for (int i = 0, prev = 0; i < origin.accessibilityPerIteration.length; i++) {
- int current = origin.accessibilityPerIteration[i];
+ for (int i = 0, prev = 0; i < origin.samples.length; i++) {
+ int current = origin.samples[i];
pixelDataOutputStream.writeInt(current - prev);
prev = current;
}
@@ -154,7 +153,7 @@ public void handleMessage (Message message) {
// write to the proper subregion of the buffer for this origin
// use a synchronized block to ensure no threading issues
synchronized (this) {
- if (buffer == null) this.initialize(origin.accessibilityPerIteration.length);
+ if (buffer == null) this.initialize(origin.samples.length);
// The origins we receive have 2d coordinates.
// Flatten them to compute file offsets and for the origin checklist.
int index1d = origin.y * request.width + origin.x;
diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/Origin.java b/src/main/java/com/conveyal/r5/analyst/cluster/Origin.java
index 0afb422b2..99bfd69bd 100644
--- a/src/main/java/com/conveyal/r5/analyst/cluster/Origin.java
+++ b/src/main/java/com/conveyal/r5/analyst/cluster/Origin.java
@@ -1,6 +1,5 @@
package com.conveyal.r5.analyst.cluster;
-import com.conveyal.r5.publish.StaticSiteRequest;
import com.google.common.io.LittleEndianDataInputStream;
import com.google.common.io.LittleEndianDataOutputStream;
import org.slf4j.Logger;
@@ -26,14 +25,17 @@ public class Origin {
/** Y coordinate of origin within regional analysis */
public int y;
- /** The instantaneous accessibility for each iteration */
- public int[] accessibilityPerIteration;
+ /**
+ * Samples of accessibility. Depending on worker version, this could be bootstrap replications of accessibility given
+ * median travel time, or with older workers would be instantaneous accessibility for each departure minute.
+ */
+ public int[] samples;
/** Construct an origin given a grid request and the instantaneous accessibility computed for each iteration */
- public Origin (GridRequest request, int[] accessibilityPerIteration) {
+ public Origin (GridRequest request, int[] samples) {
this.x = request.x;
this.y = request.y;
- this.accessibilityPerIteration = accessibilityPerIteration;
+ this.samples = samples;
}
/** allow construction of blank origin for static read method */
@@ -56,9 +58,9 @@ public void write(OutputStream out) throws IOException {
data.writeInt(y);
// write the number of iterations
- data.writeInt(accessibilityPerIteration.length);
+ data.writeInt(samples.length);
- for (int i : accessibilityPerIteration) {
+ for (int i : samples) {
// don't bother to delta code, these are small and we're not gzipping
data.writeInt(i);
}
@@ -91,11 +93,11 @@ public static Origin read (InputStream inputStream) throws IOException {
origin.x = data.readInt();
origin.y = data.readInt();
- origin.accessibilityPerIteration = new int[data.readInt()];
+ origin.samples = new int[data.readInt()];
- for (int iteration = 0; iteration < origin.accessibilityPerIteration.length; iteration++) {
+ for (int iteration = 0; iteration < origin.samples.length; iteration++) {
// de-delta-code the origin
- origin.accessibilityPerIteration[iteration] = data.readInt();
+ origin.samples[iteration] = data.readInt();
}
data.close();
diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/TravelTimeSurfaceRequest.java b/src/main/java/com/conveyal/r5/analyst/cluster/TravelTimeSurfaceRequest.java
new file mode 100644
index 000000000..ddb8ef373
--- /dev/null
+++ b/src/main/java/com/conveyal/r5/analyst/cluster/TravelTimeSurfaceRequest.java
@@ -0,0 +1,36 @@
+package com.conveyal.r5.analyst.cluster;
+
+import com.conveyal.r5.profile.ProfileRequest;
+import com.fasterxml.jackson.annotation.JsonInclude;
+
+/**
+ * Request a travel time surface containing travel times to all destinations for several percentiles of travel time.
+ */
+public class TravelTimeSurfaceRequest extends GenericClusterRequest {
+ /** The profile request to use */
+ public ProfileRequest request;
+
+ public int zoom;
+ public int west;
+ public int north;
+ public int width;
+ public int height;
+
+ /** Which percentiles to calculate */
+ public double[] percentiles = new double[] { 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95 };
+
+ /** The grid key on S3 to compute access to */
+ public String grid;
+
+ public final String type = "travel-time-surface";
+
+ @Override
+ public ProfileRequest extractProfileRequest() {
+ return request;
+ }
+
+ @Override
+ public boolean isHighPriority() {
+ return true; // travel time surfaces used in high priority requests only
+ }
+}
diff --git a/src/main/java/com/conveyal/r5/analyst/scenario/AndrewOwenMeanGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/scenario/AndrewOwenMeanGridStatisticComputer.java
index 168d0c7bc..c725e87a8 100644
--- a/src/main/java/com/conveyal/r5/analyst/scenario/AndrewOwenMeanGridStatisticComputer.java
+++ b/src/main/java/com/conveyal/r5/analyst/scenario/AndrewOwenMeanGridStatisticComputer.java
@@ -26,60 +26,11 @@
* Planning Using Interactive Accessibility Methods on Combined Schedule and Headway-Based Networks,” under review for the
* the 96th Annual Meeting of the Transportation Research Board, January 2017.
*/
-public class AndrewOwenMeanGridStatisticComputer {
- private static final AmazonS3 s3 = new AmazonS3Client();
-
- /** Version of the access grid format we read */
- private static final int ACCESS_GRID_VERSION = 0;
-
- /** Compute a particular percentile of a grid (percentile between 0 and 100) */
- public static Grid computeMean (String resultsBucket, String key) throws IOException {
- S3Object accessGrid = s3.getObject(resultsBucket, key);
-
- LittleEndianDataInputStream input = new LittleEndianDataInputStream(new GZIPInputStream(accessGrid.getObjectContent()));
-
- char[] header = new char[8];
- for (int i = 0; i < 8; i++) {
- header[i] = (char) input.readByte();
- }
-
- if (!"ACCESSGR".equals(new String(header))) {
- throw new IllegalArgumentException("Input not in access grid format!");
- }
-
- int version = input.readInt();
-
- if (version != ACCESS_GRID_VERSION) {
- throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version));
- }
-
- int zoom = input.readInt();
- int west = input.readInt();
- int north = input.readInt();
- int width = input.readInt();
- int height = input.readInt();
- int nIterations = input.readInt();
-
- Grid grid = new Grid(zoom, width, height, north, west);
-
- for (int y = 0; y < height; y++) {
- for (int x = 0; x < width; x++) {
-
- int sum = 0;
- int count = 0;
-
- for (int iteration = 0, val = 0; iteration < nIterations; iteration++) {
- sum += (val += input.readInt());
- count++;
- }
-
- // compute percentiles
- grid.grid[x][y] = (double) sum / count;
- }
- }
-
- input.close();
-
- return grid;
+public class AndrewOwenMeanGridStatisticComputer extends GridStatisticComputer {
+ @Override
+ protected double computeValueForOrigin(int x, int y, int[] valuesThisOrigin) {
+ int sum = 0;
+ for (int i = 0; i < valuesThisOrigin.length; i++) sum += valuesThisOrigin[i];
+ return (double) sum / (double) valuesThisOrigin.length;
}
}
diff --git a/src/main/java/com/conveyal/r5/analyst/scenario/GridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/scenario/GridStatisticComputer.java
new file mode 100644
index 000000000..e8aaf45fe
--- /dev/null
+++ b/src/main/java/com/conveyal/r5/analyst/scenario/GridStatisticComputer.java
@@ -0,0 +1,117 @@
+package com.conveyal.r5.analyst.scenario;
+
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3Client;
+import com.amazonaws.services.s3.model.S3Object;
+import com.conveyal.r5.analyst.BootstrapPercentileHypothesisTestGridStatisticComputer;
+import com.conveyal.r5.analyst.DualGridStatisticComputer;
+import com.conveyal.r5.analyst.ExtractingGridStatisticComputer;
+import com.conveyal.r5.analyst.Grid;
+import com.google.common.io.LittleEndianDataInputStream;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * This contains the base functions for reading access grids, which are three-dimensional arrays, with the first
+ * two dimensions consisting of x and y coordinates of origins within the regional analysis, and the third
+ * dimension reflecting multiple values of the indicator of interest. This could be instantaneous
+ * accessibility results for each Monte Carlo draw when computing average instantaneous accessibility (i.e.
+ * Owen-style accessibility), or it could be multiple bootstrap replications of the sampling distribution of accessibility
+ * given median travel time (see Conway, M. W., Byrd, A. and van Eggermond, M. "A Statistical Approach to Comparing
+ * Accessibility Results: Including Uncertainty in Public Transport Sketch Planning," paper presented at the 2017 World
+ * Symposium of Transport and Land Use Research, Brisbane, QLD, Australia, Jul 3-6.)
+ */
+public abstract class GridStatisticComputer {
+ private static final AmazonS3 s3 = new AmazonS3Client();
+
+ /** Version of the access grid format we read */
+ private static final int ACCESS_GRID_VERSION = 0;
+
+ public Grid compute(String resultsBucket, String key) throws IOException {
+ S3Object accessGrid = s3.getObject(resultsBucket, key);
+
+ return compute(accessGrid.getObjectContent());
+ }
+
+ public Grid compute (InputStream rawInput) throws IOException {
+ LittleEndianDataInputStream input = new LittleEndianDataInputStream(new GZIPInputStream(rawInput));
+
+ char[] header = new char[8];
+ for (int i = 0; i < 8; i++) {
+ header[i] = (char) input.readByte();
+ }
+
+ if (!"ACCESSGR".equals(new String(header))) {
+ throw new IllegalArgumentException("Input not in access grid format!");
+ }
+
+ int version = input.readInt();
+
+ if (version != ACCESS_GRID_VERSION) {
+ throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version));
+ }
+
+ int zoom = input.readInt();
+ int west = input.readInt();
+ int north = input.readInt();
+ int width = input.readInt();
+ int height = input.readInt();
+
+ // The number of samples stored at each origin; these could be instantaneous accessibility values for each
+ // Monte Carlo draw, or they could be bootstrap replications of a sampling distribution of accessibility given
+ // median travel time.
+ int nSamples = input.readInt();
+
+ Grid outputGrid = new Grid(zoom, width, height, north, west);
+
+ int[] valuesThisOrigin = new int[nSamples];
+
+ for (int y = 0; y < height; y++) {
+ for (int x = 0; x < width; x++) {
+ // input values are delta-coded per origin, so use val to keep track of current value
+ for (int iteration = 0, val = 0; iteration < nSamples; iteration++) {
+ valuesThisOrigin[iteration] = (val += input.readInt());
+ }
+
+ // compute percentiles
+ outputGrid.grid[x][y] = computeValueForOrigin(x, y, valuesThisOrigin);
+ }
+ }
+
+ input.close();
+
+ return outputGrid;
+ }
+
+ /** Subclasses should override this value to compute the single value at a particular grid cell given the samples at that origin. */
+ protected abstract double computeValueForOrigin (int x, int y, int[] valuesThisOrigin);
+
+ public static void main (String... args) throws IOException {
+ GridStatisticComputer comp;
+ InputStream in;
+ if ("--two-tailed".equals(args[0])) {
+ DualGridStatisticComputer.main(args);
+ return;
+ } else if ("--extract".equals(args[0])) {
+ int which = Integer.parseInt(args[1]);
+ in = new FileInputStream(args[2]);
+ comp = new ExtractingGridStatisticComputer(which);
+ } else {
+ throw new RuntimeException("Unknown grid statistic computer " + args[0]);
+ }
+
+ Grid grid = comp.compute(in);
+
+ if (args[3].endsWith(".grid"))
+ grid.write(new FileOutputStream(args[3]));
+ else if (args[3].endsWith(".png"))
+ grid.writePng(new FileOutputStream(args[3]));
+ else if (args[3].endsWith(".tif"))
+ grid.writeGeotiff(new FileOutputStream(args[3]));
+ }
+}
diff --git a/src/main/java/com/conveyal/r5/api/ProfileResponse.java b/src/main/java/com/conveyal/r5/api/ProfileResponse.java
index 71d73913a..8dff49542 100644
--- a/src/main/java/com/conveyal/r5/api/ProfileResponse.java
+++ b/src/main/java/com/conveyal/r5/api/ProfileResponse.java
@@ -210,4 +210,9 @@ public void generateStreetTransfers(TransportNetwork transportNetwork, ProfileRe
}
request.reverseSearch = prevReverseSearch;
}
+
+ /** Recompute stats for all options, should be done once all options have been added */
+ public void recomputeStats (ProfileRequest request) {
+ this.options.forEach(o -> o.recomputeStats(request));
+ }
}
diff --git a/src/main/java/com/conveyal/r5/api/util/LegMode.java b/src/main/java/com/conveyal/r5/api/util/LegMode.java
index aa70d39ed..75cb533d5 100644
--- a/src/main/java/com/conveyal/r5/api/util/LegMode.java
+++ b/src/main/java/com/conveyal/r5/api/util/LegMode.java
@@ -1,5 +1,9 @@
package com.conveyal.r5.api.util;
+import com.conveyal.r5.profile.StreetMode;
+
+import java.util.Set;
+
/**
* Modes of transport on ingress egress legs
*/
@@ -9,5 +13,12 @@ public enum LegMode {
//Renting a bicycle
BICYCLE_RENT,
//Park & Ride
- CAR_PARK
+ CAR_PARK;
+
+ /** Return the heaviest/fastest StreetMode for use in stop finding */
+ public static StreetMode getDominantStreetMode(Set modes) {
+ if (modes.contains(LegMode.CAR)) return StreetMode.CAR;
+ else if (modes.contains(LegMode.BICYCLE)) return StreetMode.BICYCLE;
+ else return StreetMode.WALK;
+ }
}
diff --git a/src/main/java/com/conveyal/r5/api/util/ProfileOption.java b/src/main/java/com/conveyal/r5/api/util/ProfileOption.java
index 954357647..076c52767 100644
--- a/src/main/java/com/conveyal/r5/api/util/ProfileOption.java
+++ b/src/main/java/com/conveyal/r5/api/util/ProfileOption.java
@@ -1,6 +1,9 @@
package com.conveyal.r5.api.util;
+import com.conveyal.r5.profile.Path;
import com.conveyal.r5.profile.PathWithTimes;
+import com.conveyal.r5.profile.ProfileRequest;
+import com.conveyal.r5.profile.StatsCalculator;
import com.conveyal.r5.transit.TransitLayer;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
@@ -8,6 +11,8 @@
import org.slf4j.LoggerFactory;
import java.time.*;
+import java.time.temporal.TemporalAccessor;
+import java.time.temporal.TemporalField;
import java.util.*;
import java.util.stream.Collectors;
@@ -34,6 +39,18 @@ public class ProfileOption {
private transient Map accessIndexes = new HashMap<>();
private transient Map egressIndexes = new HashMap<>();
+ /**
+ * contains full itineraries needed to compute statistics. We don't present these to the client though.
+ *
+ * There is no way to compute the statistics by combining the statistics of constituent paths. Suppose there
+ * are two patterns each running every 15 minutes; we don't know if they are coming in and out of phase.
+ * Ergo, we save all itineraries so we can compute fresh stats when we're done.
+ *
+ * Since all transit paths here will have the same start and end stops, we can save all itineraries individually,
+ * we don't need to save their start and end stops as well.
+ */
+ private transient List fullItineraries = new ArrayList<>();
+
@Override public String toString() {
return "ProfileOption{" +
" transit=\n " + transit +
@@ -154,6 +171,7 @@ public void addTransit(TransitLayer transitLayer, PathWithTimes currentTransitPa
}
}
+ fullItineraries.addAll(currentTransitPath.itineraries);
}
/**
@@ -281,6 +299,29 @@ public void addMiddle(StreetSegment streetSegment, Transfer transfer) {
}
}
+ /** recompute wait and ride stats for all transit itineraries, should be done once all transit paths are added */
+ public void recomputeStats (ProfileRequest req) {
+ if (!fullItineraries.isEmpty()) {
+ StatsCalculator.StatsCollection collection =
+ StatsCalculator.computeStatistics(
+ req,
+ // TODO is this intended to handle multiple access modes to the same stop?
+ // do these ever have more or less than one value?
+ access.get(itinerary.get(0).connection.access).duration,
+ egress.get(itinerary.get(0).connection.egress).duration,
+ transit.size(),
+ fullItineraries);
+
+ this.stats = collection.stats;
+
+ for (int leg = 0; leg < transit.size(); leg++) {
+ TransitSegment segment = transit.get(leg);
+ segment.rideStats = collection.rideStats[leg];
+ segment.waitStats = collection.waitStats[leg];
+ }
+ }
+ }
+
public List getFares() {
return fares == null ? null : fares.stream().collect(Collectors.toList());
}
diff --git a/src/main/java/com/conveyal/r5/common/JsonUtilities.java b/src/main/java/com/conveyal/r5/common/JsonUtilities.java
index b91b51c28..db5788792 100644
--- a/src/main/java/com/conveyal/r5/common/JsonUtilities.java
+++ b/src/main/java/com/conveyal/r5/common/JsonUtilities.java
@@ -21,13 +21,19 @@
*/
public abstract class JsonUtilities {
public static final ObjectMapper objectMapper = createBaseObjectMapper();
+ public static final ObjectMapper lenientObjectMapper = createBaseObjectMapper();
static {
// If we receive a JSON object containing a field that we don't recognize, fail. This should catch misspellings.
+ // This is used on the broker which should always use the latest R5 so that fields aren't silently dropped because
+ // the broker does not support them.
objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true);
- }
- public static final ObjectMapper lenientObjectMapper = createBaseObjectMapper();
+ // On the worker, we want to use an objectmapper that will ignore unknown properties, so that it doesn't crash
+ // when an older worker is connected to a newer broker.
+ // TODO figure out how to warn the user when a user uses features not supported by their worker
+ lenientObjectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+ }
private static ObjectMapper createBaseObjectMapper () {
ObjectMapper objectMapper = new ObjectMapper();
diff --git a/src/main/java/com/conveyal/r5/labeling/LevelOfTrafficStressLabeler.java b/src/main/java/com/conveyal/r5/labeling/LevelOfTrafficStressLabeler.java
index b19d23e66..8101caf48 100644
--- a/src/main/java/com/conveyal/r5/labeling/LevelOfTrafficStressLabeler.java
+++ b/src/main/java/com/conveyal/r5/labeling/LevelOfTrafficStressLabeler.java
@@ -12,6 +12,8 @@
import org.slf4j.LoggerFactory;
import java.util.EnumSet;
+import java.util.HashSet;
+import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -28,6 +30,10 @@ public class LevelOfTrafficStressLabeler {
/** match OSM speeds, from http://wiki.openstreetmap.org/wiki/Key:maxspeed */
private static final Pattern speedPattern = Pattern.compile("^([0-9][\\.0-9]*?) ?(km/h|kmh|kph|mph|knots)?$");
+ Set badMaxspeedValues = new HashSet<>();
+
+ Set badLaneValues = new HashSet<>();
+
/** Set the LTS for this way in the provided flags (not taking into account any intersection LTS at the moment) */
public void label (Way way, EnumSet forwardFlags, EnumSet backFlags) {
// the general idea behind this function is that we progress from low-stress to higher-stress, bailing out as we go.
@@ -76,18 +82,23 @@ public void label (Way way, EnumSet forwardFlags, EnumSet 0) LOG.warn("Unrecognized values for maxspeed tag: {}", badMaxspeedValues);
+ if (badLaneValues.size() > 0) LOG.warn("Unrecognized values for lane tag: {}", badLaneValues);
+ }
}
diff --git a/src/main/java/com/conveyal/r5/labeling/TraversalPermissionLabeler.java b/src/main/java/com/conveyal/r5/labeling/TraversalPermissionLabeler.java
index bc2d0bd27..dbf93b757 100644
--- a/src/main/java/com/conveyal/r5/labeling/TraversalPermissionLabeler.java
+++ b/src/main/java/com/conveyal/r5/labeling/TraversalPermissionLabeler.java
@@ -283,7 +283,7 @@ private void applyOnewayHierarchically(Node node, OneWay oneWay, EnumMap getTreeForWay (Way way) {
String highway = way.getTag("highway");
EnumMap tree = getDefaultTree();
@@ -431,17 +431,24 @@ private EnumMap getDirectionalTree (Way way) {
applyOnewayHierarchically(Node.BICYCLE, sidepath, tree);
}
- // there are in fact one way pedestrian paths, believe it or not, but usually pedestrians don't inherit any oneway
- // restrictions from more general modes
+ // Usually pedestrians don't inherit any oneway restrictions from other modes.
+ // However there are there are some one way pedestrian paths, e.g. entrances with turnstiles.
+ // Most such tags are oneway:foot=no though.
applyOnewayHierarchically(Node.FOOT, OneWay.NO, tree);
- if (way.hasTag("oneway:foot")) applyOnewayHierarchically(Node.FOOT, OneWay.fromTag(way.getTag("oneway")), tree);
+ if (way.hasTag("oneway:foot")) {
+ applyOnewayHierarchically(Node.FOOT, OneWay.fromTag(way.getTag("oneway:foot")), tree);
+ }
return tree;
}
- /** We are using such a small subset of the tree that we can just code it up manually here */
+ /**
+ * We apply access permissions recursively to sub-trees of modes.
+ * https://wiki.openstreetmap.org/wiki/Computing_access_restrictions#Transport_mode_hierarchy
+ * We are using such a small subset of the tree on the wiki that we code up the parent/child relationships manually.
+ */
protected enum Node {
ACCESS, // root
FOOT, VEHICLE, // level 1
diff --git a/src/main/java/com/conveyal/r5/point_to_point/builder/PointToPointQuery.java b/src/main/java/com/conveyal/r5/point_to_point/builder/PointToPointQuery.java
index d7771501c..dc6595b89 100644
--- a/src/main/java/com/conveyal/r5/point_to_point/builder/PointToPointQuery.java
+++ b/src/main/java/com/conveyal/r5/point_to_point/builder/PointToPointQuery.java
@@ -10,6 +10,7 @@
import com.conveyal.r5.streets.StreetRouter;
import com.conveyal.r5.streets.VertexStore;
import com.conveyal.r5.transit.RouteInfo;
+import com.conveyal.r5.transit.TransitLayer;
import com.conveyal.r5.transit.TransportNetwork;
import com.conveyal.r5.transit.TripPattern;
import gnu.trove.iterator.TIntIntIterator;
@@ -185,6 +186,8 @@ public ProfileResponse getPlan(ProfileRequest request) {
profileResponse.generateStreetTransfers(transportNetwork, request);
}
+ profileResponse.recomputeStats(request);
+
LOG.info("Returned {} options", profileResponse.getOptions().size());
LOG.info("Took {} ms", System.currentTimeMillis() - startRouting);
@@ -295,7 +298,7 @@ private HashMap findAccessPaths(ProfileRequest request) {
StreetRouter streetRouter = new StreetRouter(transportNetwork.streetLayer);
streetRouter.profileRequest = request;
if (mode == LegMode.CAR_PARK) {
- streetRouter = findParkRidePath(request, streetRouter);
+ streetRouter = findParkRidePath(request, streetRouter, transportNetwork.transitLayer);
if (streetRouter != null) {
accessRouter.put(LegMode.CAR_PARK, streetRouter);
} else {
@@ -346,7 +349,7 @@ private HashMap findAccessPaths(ProfileRequest request) {
* @param streetRouter where profileRequest was already set
* @return null if path isn't found
*/
- private StreetRouter findParkRidePath(ProfileRequest request, StreetRouter streetRouter) {
+ public static StreetRouter findParkRidePath(ProfileRequest request, StreetRouter streetRouter, TransitLayer transitLayer) {
streetRouter.streetMode = StreetMode.CAR;
streetRouter.timeLimitSeconds = request.maxCarTime * 60;
streetRouter.flagSearch = VertexStore.VertexFlag.PARK_AND_RIDE;
@@ -357,7 +360,7 @@ private StreetRouter findParkRidePath(ProfileRequest request, StreetRouter stree
LOG.info("CAR PARK: Found {} car parks", carParks.size());
ParkRideRouter parkRideRouter = new ParkRideRouter(streetRouter.streetLayer);
parkRideRouter.profileRequest = request;
- parkRideRouter.addParks(carParks, transportNetwork.transitLayer);
+ parkRideRouter.addParks(carParks, transitLayer);
parkRideRouter.previousRouter = streetRouter;
return parkRideRouter;
/*StreetRouter walking = new StreetRouter(transportNetwork.streetLayer);
diff --git a/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java b/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java
index ca61a9dfc..b03ffc869 100644
--- a/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java
+++ b/src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java
@@ -40,6 +40,8 @@ public class FastRaptorWorker {
/** Minimum wait for boarding to account for schedule variation */
private static final int MINIMUM_BOARD_WAIT_SEC = 60;
+ public final int nMinutes;
+ public final int monteCarloDrawsPerMinute;
// Variables to track time spent, all in nanoseconds (some of the operations we're timing are significantly submillisecond)
// (although I suppose using ms would be fine because the number of times we cross a millisecond boundary would be proportional
@@ -107,6 +109,12 @@ public FastRaptorWorker (TransitLayer transitLayer, ProfileRequest request, TInt
for (int i = 1; i < this.scheduleState.length; i++) this.scheduleState[i].previous = this.scheduleState[i - 1];
offsets = new FrequencyRandomOffsets(transitLayer);
+
+ // compute number of minutes for scheduled search
+ nMinutes = (request.toTime - request.fromTime) / DEPARTURE_STEP_SEC;
+
+ // how many monte carlo draws per minute of scheduled search to get desired total iterations?
+ monteCarloDrawsPerMinute = (int) Math.ceil((double) request.monteCarloDraws / nMinutes);
}
/** For each iteration, return the travel time to each transit stop */
@@ -117,16 +125,10 @@ public int[][] route () {
prefilterPatterns();
- // compute number of minutes for scheduled search
- int nMinutes = (request.toTime - request.fromTime) / DEPARTURE_STEP_SEC;
-
- // how many monte carlo draws per minute of scheduled search to get desired total iterations?
- int monteCarloDrawsPerMinute = (int) Math.ceil((double) request.monteCarloDraws / nMinutes);
-
LOG.info("Performing {} scheduled iterations each with {} Monte Carlo draws for a total of {} iterations",
nMinutes, monteCarloDrawsPerMinute, nMinutes * monteCarloDrawsPerMinute);
- int[][] results = new int[transit.hasFrequencies ? nMinutes * monteCarloDrawsPerMinute : nMinutes][];
+ int[][] results = new int[nMinutes * monteCarloDrawsPerMinute][];
int currentIteration = 0;
// main loop over departure times
@@ -314,9 +316,16 @@ private int[][] runRaptorForMinute (int departureTime, int iterationsPerMinute)
return result;
} else {
- // no frequencies, return result of scheduled search
- if (saveAllStates) statesEachIteration.add(scheduleState[request.maxRides].deepCopy());
- return new int[][] { scheduleState[request.maxRides].bestNonTransferTimes };
+ // No frequencies, return result of scheduled search, but multiplied by the number of
+ // MC draws so that the scheduled search accessibility avoids potential bugs where assumptions
+ // are made about how many results will be returned from a search, e.g., in
+ // https://github.com/conveyal/r5/issues/306
+ int[][] result = new int[iterationsPerMinute][];
+ for (int i = 0; i < monteCarloDrawsPerMinute; i++) {
+ if (saveAllStates) statesEachIteration.add(scheduleState[request.maxRides].deepCopy());
+ result[i] = scheduleState[request.maxRides].bestNonTransferTimes;
+ }
+ return result;
}
}
@@ -408,7 +417,11 @@ private void doScheduledSearchForRound(RaptorState inputState, RaptorState outpu
}
}
- private void doFrequencySearchForRound(RaptorState inputState, RaptorState outputState, boolean bound) {
+ /** Do a frequency search. If computeDeterministicUpperBound is true, worst-case frequency boarding time will be used
+ * so that the output of this function can be used in a range-RAPTOR search. Otherwise Monte Carlo schedules will be
+ * used to improve upon the output of the range-RAPTOR bounds search.
+ */
+ private void doFrequencySearchForRound(RaptorState inputState, RaptorState outputState, boolean computeDeterministicUpperBound) {
BitSet patternsTouched = getPatternsTouchedForStops(inputState, frequencyIndexForOriginalPatternIndex);
for (int patternIndex = patternsTouched.nextSetBit(0); patternIndex >= 0; patternIndex = patternsTouched.nextSetBit(patternIndex + 1)) {
@@ -445,9 +458,12 @@ private void doFrequencySearchForRound(RaptorState inputState, RaptorState outpu
if (inputState.bestStopsTouched.get(stop)) {
int earliestBoardTime = inputState.bestTimes[stop] + MINIMUM_BOARD_WAIT_SEC;
- int newBoardingDepartureTimeAtStop = bound ?
- getRandomFrequencyDepartureTime(schedule, stopPositionInPattern, offset, frequencyEntryIdx, earliestBoardTime) :
- getWorstCaseFrequencyDepartureTime(schedule, stopPositionInPattern, frequencyEntryIdx, earliestBoardTime);
+ // if we're computing the upper bound, we want the worst case. This is the only thing that is
+ // valid in a range RAPTOR search; using random schedule draws in range RAPTOR would be problematic
+ // because they need to be independent across minutes.
+ int newBoardingDepartureTimeAtStop = computeDeterministicUpperBound ?
+ getWorstCaseFrequencyDepartureTime(schedule, stopPositionInPattern, frequencyEntryIdx, earliestBoardTime) :
+ getRandomFrequencyDepartureTime(schedule, stopPositionInPattern, offset, frequencyEntryIdx, earliestBoardTime);
int remainOnBoardDepartureTimeAtStop = Integer.MAX_VALUE;
diff --git a/src/main/java/com/conveyal/r5/profile/PathWithTimes.java b/src/main/java/com/conveyal/r5/profile/PathWithTimes.java
index 648a8681d..66e86eb26 100644
--- a/src/main/java/com/conveyal/r5/profile/PathWithTimes.java
+++ b/src/main/java/com/conveyal/r5/profile/PathWithTimes.java
@@ -21,7 +21,6 @@ public class PathWithTimes extends Path {
/** Wait stats for each leg */
public Stats[] waitStats;
-
/** Ride stats for each leg */
public Stats[] rideStats;
@@ -175,69 +174,12 @@ private void sortAndFilterItineraries () {
* we just compute how long the path takes at every possible departure minute. There's probably an
* elegant theoretical way to do this, but I prefer pragmatism over theory.
*/
- private void computeStatistics (ProfileRequest req, int accessTime, int egressTime) {
- this.stats = new Stats();
- this.rideStats = IntStream.range(0, this.patterns.length).mapToObj(i -> new Stats()).toArray(Stats[]::new);
- this.waitStats = IntStream.range(0, this.patterns.length).mapToObj(i -> new Stats()).toArray(Stats[]::new);
-
- for (int start = req.fromTime; start < req.toTime; start += 60) {
- // TODO should board slack be applied at the origin stop? Is this done in RaptorWorker?
- int timeAtOriginStop = start + accessTime + RaptorWorker.BOARD_SLACK_SECONDS;
- int bestTimeAtDestinationStop = Integer.MAX_VALUE;
-
- Itinerary bestItinerary = null;
- for (Itinerary itin : this.itineraries) {
- // itinerary cannot be used at this time
- if (itin.boardTimes[0] < timeAtOriginStop) continue;
-
- if (itin.alightTimes[this.length - 1] < bestTimeAtDestinationStop) {
- bestTimeAtDestinationStop = itin.alightTimes[this.length - 1];
- bestItinerary = itin;
- }
- }
-
- if (bestItinerary == null) continue; // cannot use this trip at this time
-
- int bestTimeAtDestination = bestTimeAtDestinationStop + egressTime;
-
- int travelTime = bestTimeAtDestination - start;
-
- stats.num++;
- stats.avg += travelTime;
- stats.min = Math.min(stats.min, travelTime);
- stats.max = Math.max(stats.max, travelTime);
-
- // accumulate stats for each leg
- for (int leg = 0; leg < this.patterns.length; leg++) {
- Stats ride = rideStats[leg];
- int rideLen = bestItinerary.alightTimes[leg] - bestItinerary.boardTimes[leg];
- ride.num++;
- ride.avg += rideLen;
- ride.min = Math.min(ride.min, rideLen);
- ride.max = Math.max(ride.max, rideLen);
-
- Stats wait = waitStats[leg];
-
- int arriveAtStopTime = leg == 0 ? timeAtOriginStop : bestItinerary.arriveAtBoardStopTimes[leg];
-
- int waitTime = bestItinerary.boardTimes[leg] - arriveAtStopTime;
-
- wait.num++;
- wait.avg += waitTime;
- wait.min = Math.min(waitTime, wait.min);
- wait.max = Math.max(waitTime, wait.max);
- }
- }
-
- if (stats.num == 0) throw new IllegalStateException("No valid itineraries found for path computed in RaptorWorker");
-
- stats.avg /= stats.num;
-
- for (Stats[] statSet : new Stats[][] { rideStats, waitStats }) {
- for (Stats stats : statSet) {
- stats.avg /= stats.num;
- }
- }
+ public void computeStatistics (ProfileRequest req, int accessTime, int egressTime) {
+ StatsCalculator.StatsCollection coll =
+ StatsCalculator.computeStatistics(req, accessTime, egressTime, length, itineraries);
+ this.stats = coll.stats;
+ this.waitStats = coll.waitStats;
+ this.rideStats = coll.rideStats;
}
/** itineraries for this path, which can be sorted by departure time */
diff --git a/src/main/java/com/conveyal/r5/profile/PerTargetPropagater.java b/src/main/java/com/conveyal/r5/profile/PerTargetPropagater.java
new file mode 100644
index 000000000..e6e946bcf
--- /dev/null
+++ b/src/main/java/com/conveyal/r5/profile/PerTargetPropagater.java
@@ -0,0 +1,159 @@
+package com.conveyal.r5.profile;
+
+import com.conveyal.r5.streets.LinkedPointSet;
+import gnu.trove.map.TIntIntMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+
+/**
+ * This class propagates from times at transit stops to times at destinations (targets). It is called with a function
+ * that will be called with the target index (which is the row-major 1D index of the destination that is being
+ * propagated to) and the array of whether the target was reached within the travel time cutoff in each Monte Carlo
+ * draw. This is used in GridComputer to perform bootstrapping of accessibility given median travel time. This function
+ * is only called for targets that were ever reached.
+ *
+ * It may seem needlessly generic to use a lambda function, but it allows us to confine the bootstrapping code to GridComputer.
+ * Perhaps this should be refactored to be a BootstrappingPropagater that just returns bootstrapped accessibility values.
+ */
+public class PerTargetPropagater {
+ private static final Logger LOG = LoggerFactory.getLogger(PerTargetPropagater.class);
+
+ /** Times at transit stops for each iteration */
+ public final int[][] travelTimesToStopsEachIteration;
+
+ /** Times at targets using the street network */
+ public final int[] nonTransitTravelTimesToTargets;
+
+ /** The travel time cutoff in this regional analysis */
+ public final int cutoffSeconds;
+
+ /** The linked targets */
+ public final LinkedPointSet targets;
+
+ /** the profilerequest (used for walk speed etc.) */
+ public final ProfileRequest request;
+
+ public PerTargetPropagater (int[][] travelTimesToStopsEachIteration, int[] nonTransitTravelTimesToTargets, LinkedPointSet targets, ProfileRequest request, int cutoffSeconds) {
+ this.travelTimesToStopsEachIteration = travelTimesToStopsEachIteration;
+ this.nonTransitTravelTimesToTargets = nonTransitTravelTimesToTargets;
+ this.targets = targets;
+ this.request = request;
+ this.cutoffSeconds = cutoffSeconds;
+ }
+
+ private void propagate (Reducer reducer, TravelTimeReducer travelTimeReducer) {
+ boolean saveTravelTimes = travelTimeReducer != null;
+ targets.makePointToStopDistanceTablesIfNeeded();
+
+ long startTimeMillis = System.currentTimeMillis();
+ // avoid float math in loop below
+ // float math was previously observed to slow down this loop, however it's debatable whether that was due to
+ // casts, the operations themselves, or the fact that the operations were being completed with doubles rather
+ // than floats.
+ int speedMillimetersPerSecond = (int) (request.walkSpeed * 1000);
+
+ boolean[] perIterationResults = new boolean[travelTimesToStopsEachIteration.length];
+ int[] perIterationTravelTimes = saveTravelTimes ? new int[travelTimesToStopsEachIteration.length] : null;
+
+ // Invert the travel times to stops array, to provide better memory locality in the tight loop below. Confirmed
+ // that this provides a significant speedup, which makes sense; Java doesn't have true multidimensional arrays
+ // but rather represents int[][] as Object[int[]], which means that each of the arrays "on the inside" is stored
+ // separately in memory and may not be contiguous, also meaning the CPU can't efficiently predict and prefetch
+ // what we need next. When we invert the array, we then have all travel times to a particular stop for all iterations
+ // in a single array. The CPU will only page the data that is relevant for the current target, i.e. the travel
+ // times to nearby stops. Since we are also looping over the targets in a geographic manner (in row-major order),
+ // it is likely the stops relevant to a particular target will already be in memory from the previous target.
+ // This should not increase memory consumption very much as we're only storing the travel times to stops. The
+ // Netherlands has about 70,000 stops, if you do 1,000 iterations to 70,000 stops, the array being duplicated is
+ // only 70,000 * 1000 * 4 bytes per int ~= 267 megabytes. I don't think it will be worthwhile to change the
+ // algorithm to output already-transposed data as that will create other memory locality problems (since the
+ // pathfinding algorithm solves one iteration for all stops simultaneously).
+ int[][] invertedTravelTimesToStops = new int[travelTimesToStopsEachIteration[0].length][travelTimesToStopsEachIteration.length];
+
+ for (int iteration = 0; iteration < travelTimesToStopsEachIteration.length; iteration++) {
+ for (int stop = 0; stop < travelTimesToStopsEachIteration[0].length; stop++) {
+ invertedTravelTimesToStops[stop][iteration] = travelTimesToStopsEachIteration[iteration][stop];
+ }
+ }
+
+ for (int targetIdx = 0; targetIdx < targets.size(); targetIdx++) {
+ // clear previous results, fill with whether target is reached within the cutoff without transit (which does
+ // not vary with monte carlo draw)
+ boolean targetReachedWithoutTransit = nonTransitTravelTimesToTargets[targetIdx] < cutoffSeconds;
+ Arrays.fill(perIterationResults, targetReachedWithoutTransit);
+ if (saveTravelTimes) {
+ Arrays.fill(perIterationTravelTimes, nonTransitTravelTimesToTargets[targetIdx]);
+ }
+
+ if (targetReachedWithoutTransit && !saveTravelTimes) {
+ // if the target is reached without transit, there's no need to do any propagation as the array cannot
+ // change
+ reducer.accept(targetIdx, perIterationResults);
+ continue;
+ }
+
+ TIntIntMap pointToStopDistanceTable = targets.pointToStopDistanceTables.get(targetIdx);
+
+ // all variables used in lambdas must be "effectively final"; arrays are effectively final even if their
+ // values change
+ boolean[] targetEverReached = new boolean[] { nonTransitTravelTimesToTargets[targetIdx] <= cutoffSeconds };
+
+ // don't try to propagate transit if there are no nearby transit stops,
+ // but still call the reducer below with the non-transit times, because you can walk even where there is no
+ // transit
+ if (pointToStopDistanceTable != null) {
+ pointToStopDistanceTable.forEachEntry((stop, distanceMillimeters) -> {
+ for (int iteration = 0; iteration < perIterationResults.length; iteration++) {
+ int timeAtStop = invertedTravelTimesToStops[stop][iteration];
+
+ if (timeAtStop > cutoffSeconds || saveTravelTimes && timeAtStop > perIterationTravelTimes[iteration]) continue; // avoid overflow
+
+ int timeAtTargetThisStop = timeAtStop + distanceMillimeters / speedMillimetersPerSecond;
+
+ if (timeAtTargetThisStop < cutoffSeconds) {
+ if (saveTravelTimes) {
+ if (timeAtTargetThisStop < perIterationTravelTimes[iteration]) {
+ perIterationTravelTimes[iteration] = timeAtTargetThisStop;
+ targetEverReached[0] = true;
+ }
+ } else {
+ perIterationResults[iteration] = true;
+ targetEverReached[0] = true;
+ }
+ }
+ }
+ return true;
+ });
+ }
+
+ if (saveTravelTimes) travelTimeReducer.accept(targetIdx, perIterationTravelTimes);
+ else if (targetEverReached[0]) reducer.accept(targetIdx, perIterationResults);
+ }
+
+ long totalTimeMillis = System.currentTimeMillis() - startTimeMillis;
+ LOG.info("Propagating {} iterations from {} stops to {} targets took {}s",
+ travelTimesToStopsEachIteration.length,
+ travelTimesToStopsEachIteration[0].length,
+ targets.size(),
+ totalTimeMillis / 1000d
+ );
+ }
+
+ public void propagate (Reducer reducer) {
+ propagate(reducer, null);
+ }
+
+ public void propagateTimes (TravelTimeReducer reducer) {
+ propagate(null, reducer);
+ }
+
+ public static interface Reducer {
+ public void accept (int targetIndex, boolean[] targetReachedWithinCutoff);
+ }
+
+ public interface TravelTimeReducer {
+ public void accept (int targetIndex, int[] travelTimesForTargets);
+ }
+}
diff --git a/src/main/java/com/conveyal/r5/profile/ProfileRequest.java b/src/main/java/com/conveyal/r5/profile/ProfileRequest.java
index dad02f187..79cb62005 100644
--- a/src/main/java/com/conveyal/r5/profile/ProfileRequest.java
+++ b/src/main/java/com/conveyal/r5/profile/ProfileRequest.java
@@ -9,6 +9,7 @@
import com.conveyal.r5.api.util.TransitModes;
import com.conveyal.r5.model.json_serialization.*;
import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import graphql.schema.DataFetchingEnvironment;
@@ -236,6 +237,7 @@ timings in the schedule. Such an option does not overlap (temporally) its altern
//If true current search is reverse search AKA we are looking for a path from destination to origin in reverse
//It differs from searchType because it is used as egress search
+ @JsonInclude(JsonInclude.Include.NON_DEFAULT)
public boolean reverseSearch = false;
/** maximum fare. If nonnegative, fares will be used in routing. */
@@ -325,6 +327,20 @@ public float getSpeed(StreetMode streetMode) {
throw new IllegalArgumentException("getSpeed(): Invalid mode " + streetMode);
}
+ @JsonIgnore
+ public int getMaxAccessTime (StreetMode mode) {
+ switch (mode) {
+ case CAR:
+ return maxCarTime;
+ case BICYCLE:
+ return maxBikeTime;
+ case WALK:
+ return maxWalkTime;
+ default:
+ throw new IllegalArgumentException("Invalid mode");
+ }
+ }
+
/**
*
* @return true if there is any transitMode in transitModes (Safe to call if transitModes is null)
diff --git a/src/main/java/com/conveyal/r5/profile/Propagater.java b/src/main/java/com/conveyal/r5/profile/Propagater.java
deleted file mode 100644
index 29948dd2e..000000000
--- a/src/main/java/com/conveyal/r5/profile/Propagater.java
+++ /dev/null
@@ -1,101 +0,0 @@
-package com.conveyal.r5.profile;
-
-import com.conveyal.r5.streets.LinkedPointSet;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Arrays;
-import java.util.function.ToIntFunction;
-import java.util.stream.IntStream;
-
-/**
- * A propagater does propagation, like how an alligator does allegation.
- *
- * This takes a matrix of times at each transit stop for each RAPTOR iteration, and propagates those times
- * to the street network. A matrix of travel times to points at each iteration can be too large in very large
- * graphs (e.g. NYC or NL), thus this works in a streaming fashion, calling a function passed into the propagate
- * call with the results of each iteration, which can then be summarized or written to disk as the caller desires.
- */
-public class Propagater {
- private static final Logger LOG = LoggerFactory.getLogger(Propagater.class);
-
- /** Times at transit stops for each iteration */
- public final int[][] travelTimesToStopsEachIteration;
-
- /** Times at targets using the street network */
- public final int[] nonTransferTravelTimesToTargets;
-
- /** The linked targets */
- public final LinkedPointSet targets;
-
- /** the profilerequest (used for walk speed etc.) */
- public final ProfileRequest request;
-
- public Propagater (int[][] travelTimesToStopsEachIteration, int[] nonTransferTravelTimesToTargets, LinkedPointSet targets, ProfileRequest request) {
- this.travelTimesToStopsEachIteration = travelTimesToStopsEachIteration;
- this.nonTransferTravelTimesToTargets = nonTransferTravelTimesToTargets;
- this.targets = targets;
- this.request = request;
- }
-
- /**
- * perform the propagation from transit stops to targets, and call the reducer with the travel time to each target
- * for each iteration.
- *
- * For large networks, making a 2-D array of all the travel times to every target is too large to fit in memory, so
- * we call the reducer function with the times for each iteration, and it can summarize on the fly, write to disk,
- * &c. as desired by the caller.
- */
- public int[] propagate (ToIntFunction reducer) {
- long startTime = System.currentTimeMillis();
- // propagate the transit times
- // NB if this is slow, parallelize; however, it's observed to be quick even in large networks, and for regional
- // analyses, the entire request is already parallelized at the origin level (i.e. we're computing the accessibility
- // for multiple origins simultaneously).
- // This can use something akin to range-RAPTOR where we only update the travel time if it changes from one
- // iteration to the next, but that requires working on clock times, so we'd have to pass more information down
- // into this function; I'm not sure it's worth it.
- int[] result = new int[travelTimesToStopsEachIteration.length];
- for (int iteration = 0; iteration < travelTimesToStopsEachIteration.length; iteration++) {
- int[] travelTimesToTargetsThisIteration = Arrays.copyOf(nonTransferTravelTimesToTargets, nonTransferTravelTimesToTargets.length);
- int[] travelTimesToStopsThisIteration = travelTimesToStopsEachIteration[iteration];
-
- // avoid float math in loop below
- // float math was previously observed to slow down this loop, however it's debatable whether that was due to
- // casts, the operations themselves, or the fact that the operations were being completed with doubles rather
- // than floats.
- int speedMillimetersPerSecond = (int) (request.walkSpeed * 1000);
-
- // Loop over each stop and propagate
- for (int stop = 0; stop < travelTimesToStopsThisIteration.length; stop++) {
- int travelTimeToStop = travelTimesToStopsThisIteration[stop];
-
- if (travelTimeToStop == RaptorWorker.UNREACHED) continue;
-
- int[] stopToTargetTree = targets.stopToPointDistanceTables.get(stop);
- if (stopToTargetTree == null) continue; // stop is unlinked
- for (int idx = 0; idx < stopToTargetTree.length; idx += 2) {
- int target = stopToTargetTree[idx];
- int distanceMillimeters = stopToTargetTree[idx + 1];
- int timeSeconds = distanceMillimeters / speedMillimetersPerSecond;
- int timeToTargetIncludingTransit = travelTimeToStop + timeSeconds;
-
- if (timeToTargetIncludingTransit < travelTimesToTargetsThisIteration[target]) {
- travelTimesToTargetsThisIteration[target] = timeToTargetIncludingTransit;
- }
- }
- }
-
- result[iteration] = reducer.applyAsInt(travelTimesToTargetsThisIteration);
- }
-
- LOG.info("Propagation from {} stops to {} targets for {} iterations took {}s",
- travelTimesToStopsEachIteration[0].length,
- targets.size(),
- travelTimesToStopsEachIteration.length,
- (System.currentTimeMillis() - startTime) / 1000d
- );
-
- return result;
- }
-}
diff --git a/src/main/java/com/conveyal/r5/profile/StatsCalculator.java b/src/main/java/com/conveyal/r5/profile/StatsCalculator.java
new file mode 100644
index 000000000..52d27a69d
--- /dev/null
+++ b/src/main/java/com/conveyal/r5/profile/StatsCalculator.java
@@ -0,0 +1,97 @@
+package com.conveyal.r5.profile;
+
+import com.conveyal.r5.api.util.Itinerary;
+import com.conveyal.r5.api.util.Stats;
+
+import java.util.Collection;
+import java.util.stream.IntStream;
+
+/**
+ * Created by matthewc on 4/4/17.
+ */
+public class StatsCalculator {
+ /**
+ * Compute statistics for this particular path over the time window. This is super simple,
+ * we just compute how long the path takes at every possible departure minute. There's probably an
+ * elegant theoretical way to do this, but I prefer pragmatism over theory.
+ */
+ public static StatsCollection computeStatistics (ProfileRequest req, int accessTime, int egressTime,
+ int nSegments, Collection itineraries) {
+ Stats stats = new Stats();
+ Stats[] rideStats = IntStream.range(0, nSegments).mapToObj(i -> new Stats()).toArray(Stats[]::new);
+ Stats[] waitStats = IntStream.range(0, nSegments).mapToObj(i -> new Stats()).toArray(Stats[]::new);
+
+ for (int start = req.fromTime; start < req.toTime; start += 60) {
+ // TODO should board slack be applied at the origin stop? Is this done in RaptorWorker?
+ int timeAtOriginStop = start + accessTime + RaptorWorker.BOARD_SLACK_SECONDS;
+ int bestTimeAtDestinationStop = Integer.MAX_VALUE;
+
+ PathWithTimes.Itinerary bestItinerary = null;
+ for (PathWithTimes.Itinerary itin : itineraries) {
+ // itinerary cannot be used at this time
+ if (itin.boardTimes[0] < timeAtOriginStop) continue;
+
+ if (itin.alightTimes[nSegments - 1] < bestTimeAtDestinationStop) {
+ bestTimeAtDestinationStop = itin.alightTimes[nSegments - 1];
+ bestItinerary = itin;
+ }
+ }
+
+ if (bestItinerary == null) continue; // cannot use this trip at this time
+
+ int bestTimeAtDestination = bestTimeAtDestinationStop + egressTime;
+
+ int travelTime = bestTimeAtDestination - start;
+
+ stats.num++;
+ stats.avg += travelTime;
+ stats.min = Math.min(stats.min, travelTime);
+ stats.max = Math.max(stats.max, travelTime);
+
+ // accumulate stats for each leg
+ for (int leg = 0; leg < nSegments; leg++) {
+ Stats ride = rideStats[leg];
+ int rideLen = bestItinerary.alightTimes[leg] - bestItinerary.boardTimes[leg];
+ ride.num++;
+ ride.avg += rideLen;
+ ride.min = Math.min(ride.min, rideLen);
+ ride.max = Math.max(ride.max, rideLen);
+
+ Stats wait = waitStats[leg];
+
+ int arriveAtStopTime = leg == 0 ? timeAtOriginStop : bestItinerary.arriveAtBoardStopTimes[leg];
+
+ int waitTime = bestItinerary.boardTimes[leg] - arriveAtStopTime;
+
+ wait.num++;
+ wait.avg += waitTime;
+ wait.min = Math.min(waitTime, wait.min);
+ wait.max = Math.max(waitTime, wait.max);
+ }
+ }
+
+ if (stats.num == 0) throw new IllegalStateException("No valid itineraries found for path computed in RaptorWorker");
+
+ stats.avg /= stats.num;
+
+ for (Stats[] statSet : new Stats[][] { rideStats, waitStats }) {
+ for (Stats s : statSet) {
+ s.avg /= s.num;
+ }
+ }
+
+ return new StatsCollection(stats, waitStats, rideStats);
+ }
+
+ public static class StatsCollection {
+ public Stats stats;
+ public Stats[] waitStats;
+ public Stats[] rideStats;
+
+ public StatsCollection (Stats stats, Stats[] waitStats, Stats[] rideStats) {
+ this.stats = stats;
+ this.rideStats = rideStats;
+ this.waitStats = waitStats;
+ }
+ }
+}
diff --git a/src/main/java/com/conveyal/r5/streets/EdgeStore.java b/src/main/java/com/conveyal/r5/streets/EdgeStore.java
index 8beaf19a2..0f45d7108 100644
--- a/src/main/java/com/conveyal/r5/streets/EdgeStore.java
+++ b/src/main/java/com/conveyal/r5/streets/EdgeStore.java
@@ -1071,9 +1071,12 @@ private EdgeStore() {
* lists.
* We don't use clone() and make sure every field copy is explicit below, avoiding any unintentional shallow-copying
* of collections or referenced data structures.
+ *
+ * @param copiedStreetLayer the copy of the street layer that this edgeStore will reference.
*/
- public EdgeStore extendOnlyCopy() {
+ public EdgeStore extendOnlyCopy(StreetLayer copiedStreetLayer) {
EdgeStore copy = new EdgeStore();
+ copy.layer = copiedStreetLayer;
copy.firstModifiableEdge = this.nEdges();
// The Edge store references a vertex store, and the StreetLayer should also hold the same reference.
// So the StreetLayer that makes this copy needs to grab a pointer to the new extend only VertexStore
diff --git a/src/main/java/com/conveyal/r5/streets/LinkedPointSet.java b/src/main/java/com/conveyal/r5/streets/LinkedPointSet.java
index 289a4f7af..df2aec114 100644
--- a/src/main/java/com/conveyal/r5/streets/LinkedPointSet.java
+++ b/src/main/java/com/conveyal/r5/streets/LinkedPointSet.java
@@ -71,6 +71,12 @@ public class LinkedPointSet implements Serializable {
/** For each transit stop, the distances to nearby PointSet points as packed (point_index, distance) pairs. */
public List stopToPointDistanceTables;
+ /**
+ * For each pointset point, the stops reachable without using transit, as a map from StopID to distance in millimeters.
+ * Inverted version of stopToPointDistanceTables.
+ */
+ public transient List pointToStopDistanceTables;
+
/** It is preferred to specify a mode when linking TODO remove this. */
@Deprecated
public LinkedPointSet(PointSet pointSet, StreetLayer streetLayer) {
@@ -379,4 +385,29 @@ public void makeStopToPointDistanceTables(Geometry treeRebuildZone) {
counter.done();
}
+ public synchronized void makePointToStopDistanceTablesIfNeeded () {
+ if (pointToStopDistanceTables != null) return;
+
+ synchronized (this) {
+ // check again in case they were built while waiting on this synchronized block
+ if (pointToStopDistanceTables != null) return;
+ if (stopToPointDistanceTables == null) makeStopToPointDistanceTables(null);
+ TIntIntMap[] result = new TIntIntMap[size()];
+
+ for (int stop = 0; stop < stopToPointDistanceTables.size(); stop++) {
+ int[] stopToPointDistanceTable = stopToPointDistanceTables.get(stop);
+ if (stopToPointDistanceTable == null) continue;
+
+ for (int idx = 0; idx < stopToPointDistanceTable.length; idx += 2) {
+ int point = stopToPointDistanceTable[idx];
+ int distance = stopToPointDistanceTable[idx + 1];
+ if (result[point] == null) result[point] = new TIntIntHashMap();
+ result[point].put(stop, distance);
+ }
+ }
+ pointToStopDistanceTables = Arrays.asList(result);
+ }
+ }
+
+
}
diff --git a/src/main/java/com/conveyal/r5/streets/ParkRideRouter.java b/src/main/java/com/conveyal/r5/streets/ParkRideRouter.java
index cbbeee3b1..d6185f662 100644
--- a/src/main/java/com/conveyal/r5/streets/ParkRideRouter.java
+++ b/src/main/java/com/conveyal/r5/streets/ParkRideRouter.java
@@ -15,8 +15,12 @@
*/
public class ParkRideRouter extends StreetRouter {
+ //Key is stop index, value is duration to reach it for getReachedStops
TIntIntMap transitStopIndexDurationMap;
+ //Key is streetVertex of stops, value is duration to reach it for getTravelTimeToVertex
+ TIntIntMap streetVertexDurationMap;
+ //Key is streetVertex value is state from park ride to this stop for making path
TIntObjectMap transitStopStreetIndexParkRideState;
public ParkRideRouter(StreetLayer streetLayer) {
@@ -35,6 +39,7 @@ public ParkRideRouter(StreetLayer streetLayer) {
*/
public void addParks(TIntObjectMap carParks, TransitLayer transitLayer) {
transitStopIndexDurationMap = new TIntIntHashMap(carParks.size() * 3);
+ streetVertexDurationMap = new TIntIntHashMap(carParks.size() * 3);
TIntObjectMap parkRideLocationsMap = streetLayer.parkRideLocationsMap;
transitStopStreetIndexParkRideState = new TIntObjectHashMap<>(carParks.size());
final double walkSpeedMillimetersPerSecond = profileRequest.walkSpeed * 1000;
@@ -56,12 +61,14 @@ public void addParks(TIntObjectMap carParks, TransitLayer transitLayer) {
if (!transitStopIndexDurationMap.containsKey(toStop)) {
transitStopIndexDurationMap.put(toStop, totalTime);
transitStopStreetIndexParkRideState.put(stopStreetVertexIdx, pr_state);
+ streetVertexDurationMap.put(stopStreetVertexIdx, totalTime);
// ELSE we only add time and update P+R if new time is shorter then previously saved one
} else {
int savedTime = transitStopIndexDurationMap.get(toStop);
if (totalTime < savedTime) {
transitStopIndexDurationMap.put(toStop, totalTime);
transitStopStreetIndexParkRideState.put(stopStreetVertexIdx, pr_state);
+ streetVertexDurationMap.put(stopStreetVertexIdx, totalTime);
}
}
@@ -102,4 +109,18 @@ public State getStateAtVertex(int vertexIndex) {
return null;
}
}
+
+ /**
+ * Returns travel time to this vertex. Only returns time to stops, since only those times are saved
+ * @param vertexIndex
+ * @return
+ */
+ @Override
+ public int getTravelTimeToVertex(int vertexIndex) {
+ if (this.streetVertexDurationMap.containsKey(vertexIndex)) {
+ return streetVertexDurationMap.get(vertexIndex);
+ } else {
+ return Integer.MAX_VALUE;
+ }
+ }
}
diff --git a/src/main/java/com/conveyal/r5/streets/StreetLayer.java b/src/main/java/com/conveyal/r5/streets/StreetLayer.java
index c7c2f61e5..81e5f1c7e 100644
--- a/src/main/java/com/conveyal/r5/streets/StreetLayer.java
+++ b/src/main/java/com/conveyal/r5/streets/StreetLayer.java
@@ -24,7 +24,6 @@
import com.vividsolutions.jts.operation.union.UnaryUnionOp;
import gnu.trove.list.TIntList;
import gnu.trove.list.array.TIntArrayList;
-import gnu.trove.map.TIntIntMap;
import gnu.trove.map.TIntObjectMap;
import gnu.trove.map.TLongIntMap;
import gnu.trove.map.hash.TIntObjectHashMap;
@@ -268,6 +267,7 @@ public void loadFromOsm(OSM osm, boolean removeIslands, boolean saveVertexIndex)
}
}
}
+ stressLabeler.logErrors();
// summarize LTS statistics
Edge cursor = edgeStore.getCursor();
@@ -592,6 +592,24 @@ else if ("to".equals(member.role)) {
else if ("via".equals(member.role)) {
via.add(member);
}
+
+ // Osmosis may produce situations where referential integrity is violated, probably at the edge of the
+ // bounding box where half a turn restriction is outside the box.
+ if (member.type == OSMEntity.Type.WAY) {
+ if (!osm.ways.containsKey(member.id)) {
+ LOG.warn("Turn restriction relation {} references nonexistent way {}, dropping this relation",
+ id,
+ member.id);
+ return;
+ }
+ } else if (member.type == OSMEntity.Type.NODE) {
+ if (!osm.nodes.containsKey(member.id)) {
+ LOG.warn("Turn restriction relation {} references nonexistent node {}, dropping this relation",
+ id,
+ member.id);
+ return;
+ }
+ }
}
@@ -922,6 +940,7 @@ void addReverseTurnRestriction(TurnRestriction turnRestriction, int index) {
/**
* Get or create mapping from a global long OSM ID to an internal street vertex ID, creating the vertex as needed.
+ * @return the internal ID for the street vertex that was found or created, or -1 if there was no such OSM node.
*/
private int getVertexIndexForOsmNode(long osmNodeId) {
int vertexIndex = vertexIndexForOsmNode.get(osmNodeId);
@@ -929,13 +948,15 @@ private int getVertexIndexForOsmNode(long osmNodeId) {
// Register a new vertex, incrementing the index starting from zero.
// Store node coordinates for this new street vertex
Node node = osm.nodes.get(osmNodeId);
- vertexIndex = vertexStore.addVertex(node.getLat(), node.getLon());
-
- VertexStore.Vertex v = vertexStore.getCursor(vertexIndex);
- if (node.hasTag("highway", "traffic_signals"))
- v.setFlag(VertexStore.VertexFlag.TRAFFIC_SIGNAL);
-
- vertexIndexForOsmNode.put(osmNodeId, vertexIndex);
+ if (node == null) {
+ LOG.warn("OSM data references an undefined node. This is often the result of extracting a bounding box in Osmosis without the completeWays option.");
+ } else {
+ vertexIndex = vertexStore.addVertex(node.getLat(), node.getLon());
+ VertexStore.Vertex v = vertexStore.getCursor(vertexIndex);
+ if (node.hasTag("highway", "traffic_signals"))
+ v.setFlag(VertexStore.VertexFlag.TRAFFIC_SIGNAL);
+ vertexIndexForOsmNode.put(osmNodeId, vertexIndex);
+ }
}
return vertexIndex;
}
@@ -981,6 +1002,10 @@ private void makeEdge(Way way, int beginIdx, int endIdx, Long osmID) {
for (int n = beginIdx; n <= endIdx; n++) {
long nodeId = way.nodes[n];
Node node = osm.nodes.get(nodeId);
+ if (node == null) {
+ LOG.warn("Not creating street segment that references an undefined node.");
+ return;
+ }
envelope.expandToInclude(node.getLon(), node.getLat());
nodes.add(node);
}
@@ -1513,7 +1538,7 @@ public StreetLayer scenarioCopy(TransportNetwork newScenarioNetwork, boolean wil
// Indicate that the content of the new StreetLayer will be changed by giving it the scenario's scenarioId.
// If the copy will not be modified, scenarioId remains unchanged to allow cached pointset linkage reuse.
copy.scenarioId = newScenarioNetwork.scenarioId;
- copy.edgeStore = edgeStore.extendOnlyCopy();
+ copy.edgeStore = edgeStore.extendOnlyCopy(copy);
// The extend-only copy of the EdgeStore also contains a new extend-only copy of the VertexStore.
copy.vertexStore = copy.edgeStore.vertexStore;
copy.temporaryEdgeIndex = new IntHashGrid();
diff --git a/src/main/java/com/conveyal/r5/transit/DCFareCalculator.java b/src/main/java/com/conveyal/r5/transit/DCFareCalculator.java
index 2110c8489..10fc17f41 100644
--- a/src/main/java/com/conveyal/r5/transit/DCFareCalculator.java
+++ b/src/main/java/com/conveyal/r5/transit/DCFareCalculator.java
@@ -15,9 +15,9 @@
*/
public class DCFareCalculator {
- private static final FareTable METRORAIL = new FareTable("fares/dc/metrorail.csv");
- private static final FareTable MARC = new FareTable("fares/dc/marc.csv");
- private static final FareTable VRE = new FareTable("fares/dc/vre.csv");
+ private static final FareTable METRORAIL = new FareTable("fares/dc/metrorail.csv", true);
+ private static final FareTable MARC = new FareTable("fares/dc/marc.csv", true);
+ private static final FareTable VRE = new FareTable("fares/dc/vre.csv", true);
private static final String[] metroExpress = { "J7", "J9", "P17", "P19", "W13", "W19", "11Y", "17A", "17B", "17G",
"17H", "17K", "17L", "17M", "18E", "18G", "18H", "18P", "29E", "29G", "29H", "29X" };
@@ -40,7 +40,6 @@ public class DCFareCalculator {
private static RideType classify (RouteInfo route) {
// NOTE the agencyId string of the route's agencyAndId is not the same as the one given by route.getAgency.
// The former is the same for all routes in the feed. The latter is the true agency of the feed.
-
String agency = route.agency_id;
String agency_url = route.agency_url == null ? null : route.agency_url.toString(); // this is used in single-agency feeds so it should work
String short_name = route.route_short_name;
diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java
index 9d2ff0d25..df5ee12e3 100644
--- a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java
+++ b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java
@@ -40,6 +40,12 @@ public class TransportNetwork implements Serializable {
public TransitLayer transitLayer;
+ /**
+ * This stores any number of lightweight scenario networks built upon the current base network.
+ * FIXME that sounds like a memory leak, should be a WeighingCache or at least size-limited.
+ */
+ public transient Map scenarios = new HashMap<>();
+
/**
* A grid point set that covers the full extent of this transport network. The PointSet itself then caches linkages
* to street networks (the baseline street network, or ones with various scenarios applied). If they have been
diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java
index 7d0b62865..1d34c2557 100644
--- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java
+++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java
@@ -6,7 +6,6 @@
import com.amazonaws.services.s3.model.S3Object;
import com.conveyal.gtfs.GTFSCache;
import com.conveyal.osmlib.OSMCache;
-import com.conveyal.r5.analyst.WebMercatorGridPointSet;
import com.conveyal.r5.analyst.cluster.BundleManifest;
import com.conveyal.r5.analyst.scenario.Scenario;
import com.conveyal.r5.common.JsonUtilities;
@@ -14,17 +13,21 @@
import com.conveyal.r5.point_to_point.builder.TNBuilderConfig;
import com.conveyal.r5.profile.ProfileRequest;
import com.conveyal.r5.streets.StreetLayer;
-import com.google.common.collect.Sets;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.RemovalListener;
import com.google.common.io.ByteStreams;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
-import java.util.Map;
import java.util.Set;
+import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@@ -32,8 +35,6 @@
* This holds one or more TransportNetworks keyed on unique strings.
* Because (de)serialization is now much faster than building networks from scratch, built graphs are cached on the
* local filesystem and on S3 for later re-use.
- * Actually this currently only holds one single TransportNetwork, but that will eventually change.
- * FIXME the synchronization is kind of primitive and will need to be more sophisticated when a worker has multiple loaded networks.
*/
public class TransportNetworkCache {
@@ -44,11 +45,11 @@ public class TransportNetworkCache {
private final File cacheDir;
private final String sourceBucket;
+ private final String bucketFolder;
- String currentNetworkId = null;
-
- TransportNetwork currentNetwork = null;
+ private static final int DEFAULT_CACHE_SIZE = 1;
+ private final LoadingCache cache;
private final GTFSCache gtfsCache;
private final OSMCache osmCache;
@@ -59,51 +60,51 @@ public TransportNetworkCache(String sourceBucket) {
/** Create a transport network cache. If source bucket is null, will work offline. */
public TransportNetworkCache(String sourceBucket, File cacheDir) {
+ this(sourceBucket, cacheDir, DEFAULT_CACHE_SIZE, null);
+ }
+
+ public TransportNetworkCache(String sourceBucket, File cacheDir, String bucketFolder) {
+ this(sourceBucket, cacheDir, DEFAULT_CACHE_SIZE, bucketFolder);
+ }
+
+ /** Create a transport network cache. If source bucket is null, will work offline. */
+ public TransportNetworkCache(String sourceBucket, File cacheDir, int cacheSize, String bucketFolder) {
this.cacheDir = cacheDir;
this.sourceBucket = sourceBucket;
+ this.bucketFolder = bucketFolder != null ? bucketFolder.replaceAll("\\/","") : null;
+ this.cache = createCache(cacheSize);
this.gtfsCache = new GTFSCache(sourceBucket, cacheDir);
this.osmCache = new OSMCache(sourceBucket, cacheDir);
}
public TransportNetworkCache(GTFSCache gtfsCache, OSMCache osmCache) {
+ this(gtfsCache, osmCache, DEFAULT_CACHE_SIZE);
+ }
+
+ public TransportNetworkCache(GTFSCache gtfsCache, OSMCache osmCache, int cacheSize) {
+ this(gtfsCache, osmCache, cacheSize, null);
+ }
+
+ public TransportNetworkCache(GTFSCache gtfsCache, OSMCache osmCache, int cacheSize, String bucketFolder) {
this.gtfsCache = gtfsCache;
this.osmCache = osmCache;
+ this.cache = createCache(cacheSize);
this.cacheDir = gtfsCache.cacheDir;
this.sourceBucket = gtfsCache.bucket;
+ // we don't necessarily want to put r5 networks in the gtfsCache bucketFolder
+ // (e.g., "s3://bucket/gtfs"), but we'll go ahead and use the same bucket
+ this.bucketFolder = bucketFolder;
}
- /**
- * This stores any number of lightweight scenario networks built upon the current base network.
- * FIXME that sounds like a memory leak, should be a WeighingCache or at least size-limited.
- */
- private Map scenarioNetworkCache = new HashMap<>();
-
- /**
- * Return the graph for the given unique identifier for graph builder inputs on S3.
- * If this is the same as the last graph built, just return the pre-built graph.
- * If not, build the graph from the inputs, fetching them from S3 to the local cache as needed.
- */
+ /** Convenience method that returns transport network from cache. */
public synchronized TransportNetwork getNetwork (String networkId) {
-
- LOG.info("Finding or building a TransportNetwork for ID {} and R5 version {}", networkId, R5Version.version);
-
- if (networkId.equals(currentNetworkId)) {
- LOG.info("Network ID has not changed. Reusing the last one that was built.");
- return currentNetwork;
- }
-
- TransportNetwork network = checkCached(networkId);
- if (network == null) {
- LOG.info("Cached transport network for id {} and R5 version {} was not found. Building the network from scratch.",
- networkId, R5Version.version);
- network = buildNetwork(networkId);
+ try {
+ return cache.get(networkId);
+ } catch (Exception e) {
+ LOG.error("Exception while loading a transport network into the cache: {}", e.toString());
+ e.printStackTrace();
+ return null;
}
-
- currentNetwork = network;
- currentNetworkId = networkId;
- scenarioNetworkCache.clear(); // We cache only scenario graphs built upon the currently active base graph.
-
- return network;
}
/**
@@ -122,7 +123,10 @@ public synchronized TransportNetwork getNetworkForScenario (String networkId, Pr
// The following call clears the scenarioNetworkCache if the current base graph changes.
TransportNetwork baseNetwork = this.getNetwork(networkId);
- TransportNetwork scenarioNetwork = scenarioNetworkCache.get(scenarioId);
+ if (baseNetwork.scenarios == null) {
+ baseNetwork.scenarios = new HashMap<>();
+ }
+ TransportNetwork scenarioNetwork = baseNetwork.scenarios.get(scenarioId);
// DEBUG force scenario re-application
// scenarioNetwork = null;
@@ -135,12 +139,11 @@ public synchronized TransportNetwork getNetworkForScenario (String networkId, Pr
// resolve scenario
LOG.info("Retrieving scenario stored separately on S3 rather than in the ProfileRequest");
- String scenarioKey = String.format("%s_%s.json", networkId, scenarioId);
- File scenarioFile = new File(cacheDir, scenarioKey);
+ File scenarioFile = new File(cacheDir, getScenarioFilename(networkId, scenarioId));
if (!scenarioFile.exists()) {
try {
- S3Object obj = s3.getObject(sourceBucket, scenarioKey);
+ S3Object obj = s3.getObject(sourceBucket, getScenarioKey(networkId, scenarioId));
InputStream is = obj.getObjectContent();
OutputStream os = new BufferedOutputStream(new FileOutputStream(scenarioFile));
ByteStreams.copy(is, os);
@@ -172,18 +175,26 @@ public synchronized TransportNetwork getNetworkForScenario (String networkId, Pr
// scenario.modifications.add(0, new InactiveTripsFilter(baseNetwork, clusterRequest.profileRequest));
scenarioNetwork = scenario.applyToTransportNetwork(baseNetwork);
LOG.info("Done applying scenario. Caching the resulting network.");
- scenarioNetworkCache.put(scenario.id, scenarioNetwork);
+ baseNetwork.scenarios.put(scenario.id, scenarioNetwork);
} else {
LOG.info("Reusing cached TransportNetwork for scenario {}.", scenarioId);
}
return scenarioNetwork;
}
+ private String getScenarioFilename(String networkId, String scenarioId) {
+ return String.format("%s_%s.json", networkId, scenarioId);
+ }
+
+ private String getScenarioKey(String networkId, String scenarioId) {
+ String filename = getScenarioFilename(networkId, scenarioId);
+ return bucketFolder != null ? String.join("/", bucketFolder, filename) : filename;
+ }
+
/** If this transport network is already built and cached, fetch it quick */
private TransportNetwork checkCached (String networkId) {
try {
- String filename = networkId + "_" + R5Version.version + ".dat";
- File cacheLocation = new File(cacheDir, networkId + "_" + R5Version.version + ".dat");
+ File cacheLocation = new File(cacheDir, getR5NetworkFilename(networkId));
if (cacheLocation.exists())
LOG.info("Found locally-cached TransportNetwork at {}", cacheLocation);
else {
@@ -193,7 +204,7 @@ private TransportNetwork checkCached (String networkId) {
LOG.info("Checking for cached transport network on S3.");
S3Object tn;
try {
- tn = s3.getObject(sourceBucket, filename);
+ tn = s3.getObject(sourceBucket, getR5NetworkKey(networkId));
} catch (AmazonServiceException ex) {
LOG.info("No cached transport network was found in S3. It will be built from scratch.");
return null;
@@ -222,6 +233,16 @@ private TransportNetwork checkCached (String networkId) {
}
}
+
+ private String getR5NetworkKey(String networkId) {
+ String filename = getR5NetworkFilename(networkId);
+ return bucketFolder != null ? String.join("/", bucketFolder, filename) : filename;
+ }
+
+ private String getR5NetworkFilename(String networkId) {
+ return networkId + "_" + R5Version.version + ".dat";
+ }
+
/** If we did not find a cached network, build one */
public TransportNetwork buildNetwork (String networkId) {
@@ -247,8 +268,7 @@ public TransportNetwork buildNetwork (String networkId) {
network.rebuildLinkedGridPointSet();
// Cache the network.
- String filename = networkId + "_" + R5Version.version + ".dat";
- File cacheLocation = new File(cacheDir, networkId + "_" + R5Version.version + ".dat");
+ File cacheLocation = new File(cacheDir, getR5NetworkFilename(networkId));
try {
// Serialize TransportNetwork to local cache on this worker
@@ -256,7 +276,7 @@ public TransportNetwork buildNetwork (String networkId) {
// Upload the serialized TransportNetwork to S3
if (sourceBucket != null) {
LOG.info("Uploading the serialized TransportNetwork to S3 for use by other workers.");
- s3.putObject(sourceBucket, filename, cacheLocation);
+ s3.putObject(sourceBucket, getR5NetworkKey(networkId), cacheLocation);
LOG.info("Done uploading the serialized TransportNetwork to S3.");
} else {
LOG.info("Network saved to cache directory, not uploading to S3 while working offline.");
@@ -279,7 +299,8 @@ private TransportNetwork buildNetworkFromBundleZip (String networkId) {
if (sourceBucket != null) {
LOG.info("Downloading graph input files from S3.");
dataDirectory.mkdirs();
- S3Object graphDataZipObject = s3.getObject(sourceBucket, networkId + ".zip");
+ String networkZipKey = bucketFolder != null ? String.join("/", bucketFolder, networkId + ".zip") : networkId + ".zip";
+ S3Object graphDataZipObject = s3.getObject(sourceBucket, networkZipKey);
ZipInputStream zis = new ZipInputStream(graphDataZipObject.getObjectContent());
try {
ZipEntry entry;
@@ -329,13 +350,14 @@ private TransportNetwork buildNetworkFromBundleZip (String networkId) {
* It contains the unique IDs of the GTFS feeds and OSM extract.
*/
private TransportNetwork buildNetworkFromManifest (String networkId) {
- String manifestFileName = GTFSCache.cleanId(networkId) + ".json";
+ String manifestFileName = getManifestFilename(networkId);
File manifestFile = new File(cacheDir, manifestFileName);
// TODO handle manifest not in S3
if (!manifestFile.exists() && sourceBucket != null) {
LOG.info("Manifest file not found locally, downloading from S3");
- s3.getObject(new GetObjectRequest(sourceBucket, manifestFileName), manifestFile);
+ String manifestKey = getManifestKey(networkId);
+ s3.getObject(new GetObjectRequest(sourceBucket, manifestKey), manifestFile);
}
BundleManifest manifest;
@@ -369,17 +391,77 @@ private TransportNetwork buildNetworkFromManifest (String networkId) {
network.rebuildTransientIndexes();
- new TransferFinder(network).findTransfers();
+ TransferFinder transferFinder = new TransferFinder(network);
+ transferFinder.findTransfers();
+ transferFinder.findParkRideTransfer();
+
+ return network;
+ }
+
+ private String getManifestKey(String networkId) {
+ String filename = getManifestFilename(networkId);
+ return bucketFolder != null ? String.join("/", bucketFolder, filename) : filename;
+ }
+
+ private String getManifestFilename(String networkId) {
+ return GTFSCache.cleanId(networkId) + ".json";
+ }
+
+ private LoadingCache createCache(int size) {
+ RemovalListener removalListener = removalNotification -> {
+ String id = removalNotification.getKey();
+
+ // delete local files ONLY if using s3
+ if (sourceBucket != null) {
+ String[] extensions = {".db", ".db.p", ".zip"};
+ // delete local cache files (including zip) when feed removed from cache
+ for (String type : extensions) {
+ File file = new File(cacheDir, id + type);
+ file.delete();
+ }
+ }
+ };
+ return CacheBuilder.newBuilder()
+ .maximumSize(size)
+ .removalListener(removalListener)
+ .build(new CacheLoader() {
+ public TransportNetwork load(Object s) throws Exception {
+ // Thanks, java, for making me use a cast here. If I put generic arguments to new CacheLoader
+ // due to type erasure it can't be sure I'm using types correctly.
+ return loadNetwork((String) s);
+ }
+ });
+ }
+
+ /**
+ * Return the graph for the given unique identifier for graph builder inputs on S3.
+ * If this is the same as the last graph built, just return the pre-built graph.
+ * If not, build the graph from the inputs, fetching them from S3 to the local cache as needed.
+ */
+ private TransportNetwork loadNetwork(String networkId) {
+
+ LOG.info("Finding or building a TransportNetwork for ID {} and R5 version {}", networkId, R5Version.version);
+
+ TransportNetwork network = checkCached(networkId);
+ if (network == null) {
+ LOG.info("Cached transport network for id {} and R5 version {} was not found. Building the network from scratch.",
+ networkId, R5Version.version);
+ network = buildNetwork(networkId);
+ }
+ cache.put(networkId, network);
return network;
}
public Set getLoadedNetworkIds() {
- if (currentNetwork == null) return Collections.emptySet();
- else return Sets.newHashSet(currentNetwork.scenarioId);
+ return cache.asMap().keySet();
}
public Set getAppliedScenarios() {
- return scenarioNetworkCache.keySet();
+ return cache.asMap().values().stream()
+ .filter(network -> network.scenarios != null)
+ .map(network -> network.scenarios.keySet())
+ .flatMap(Collection::stream)
+ .collect(Collectors.toSet());
}
}
diff --git a/src/main/java/com/conveyal/r5/transit/fare/FareTable.java b/src/main/java/com/conveyal/r5/transit/fare/FareTable.java
index f60e52a24..b70d75a0f 100644
--- a/src/main/java/com/conveyal/r5/transit/fare/FareTable.java
+++ b/src/main/java/com/conveyal/r5/transit/fare/FareTable.java
@@ -22,7 +22,14 @@ public class FareTable {
private Map, Fare> fares = Maps.newHashMap();
+ private boolean ignoreAgencyId;
+
public FareTable (String name) {
+ this(name, false);
+ }
+
+ public FareTable (String name, boolean ignoreAgencyId) {
+ this.ignoreAgencyId = ignoreAgencyId;
InputStream is = FareTable.class.getClassLoader().getResourceAsStream(name);
CsvReader reader = new CsvReader(is, ',', Charset.forName("UTF-8"));
try {
@@ -42,12 +49,17 @@ public FareTable (String name) {
}
public Fare lookup (String from, String to) {
- return new Fare(fares.get(new P2(from, to))); // defensive copy, in case the caller discounts
+ return new Fare(fares.get(new P2<>(from, to))); // defensive copy, in case the caller discounts
}
public Fare lookup (Stop from, Stop to) {
- //TODO: how does this works WRT different GTFS feeds having Stops with same Ids?
- return lookup(from.stopId, to.stopId);
+ if (this.ignoreAgencyId) {
+ String fromWithoutFeedId = from.stopId.split(":", 2)[1];
+ String toWithoutFeedId = to.stopId.split(":", 2)[1];
+ return lookup(fromWithoutFeedId, toWithoutFeedId);
+ } else {
+ return lookup(from.stopId, to.stopId);
+ }
}
}
diff --git a/src/main/java/com/conveyal/r5/util/P2.java b/src/main/java/com/conveyal/r5/util/P2.java
index 1d353ab6a..3a737cace 100644
--- a/src/main/java/com/conveyal/r5/util/P2.java
+++ b/src/main/java/com/conveyal/r5/util/P2.java
@@ -6,18 +6,41 @@
/**
* Tuple of two elements with same type
*/
-public class P2 extends Pair {
+public class P2 {
+ public final E a;
+
+ public final E b;
+
/**
* Creates a new pair
*
- * @param key The key for this pair
- * @param value The value to use for this pair
+ * @param b The key for this pair
+ * @param b The value to use for this pair
*/
public P2(
- @NamedArg("key")
- E key,
- @NamedArg("value")
- E value) {
- super(key, value);
+ E a,
+ E b) {
+ this.a = a;
+ this.b = b;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("P2<%s %s>", a, b);
+ }
+
+ @Override
+ public int hashCode() {
+ return (a != null ? a.hashCode() : 0) +
+ (b != null ? b.hashCode() * 31 : 0);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if(!(o instanceof P2)) return false;
+ P2 other = (P2) o;
+ boolean aIsEqual = (a == null) ? other.a == null : a.equals(other.a);
+ boolean bIsEqual = (b == null) ? other.b == null : b.equals(other.b);
+ return aIsEqual && bIsEqual;
}
}
diff --git a/src/main/resources/worker.sh b/src/main/resources/worker.sh
new file mode 100644
index 000000000..33f4314b5
--- /dev/null
+++ b/src/main/resources/worker.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# Downloads and runs an analyst worker.
+# This shell script undergoes variable substitution via the Java MessageFormat class before being passed to newly
+# started worker machines via the AWS EC2 user data. MessageFormat will replace special tokens (consisting of numbers
+# inside curly braces) with configuration information specific to the worker being started. These are:
+# 0: the URL to grab the worker JAR from
+# 1: the AWS log group to use
+# 2: the worker configuration to use
+# If you are reading this comment inside the EC2 user data field, this variable substitution has already happened.
+# The string instance_id in curly brackets is substituted by EC2 at startup, not by our Java code. It and any shell
+# variable references that contain brackets are single-quoted to tell Java's MessageFormat not to substitute them.
+
+# prep the system: install log agent, java
+yum -y install awslogs java-1.8.0-openjdk
+
+# first things first: set up logging
+LOGFILE=/var/log/analyst-worker.log
+
+echo Starting analyst worker at `date` > $LOGFILE
+
+# make it so that the worker can write to the logfile
+chown ec2-user:ec2-user $LOGFILE
+chmod 664 $LOGFILE # Log agent needs to read log file
+
+# using a shell "herefile" or "heredoc", pipe the data between < /etc/awslogs/awslogs.conf < /etc/awslogs/awscli.conf < ~ec2-user/worker.conf <> $LOGFILE 2>&1
+
+# Figure out how much memory to give the worker in kilobytes
+TOTAL_MEM=`grep MemTotal /proc/meminfo | sed 's/[^0-9]//g'`
+# 2097152 kb is 2GB, leave that much for the OS
+MEM=`echo $TOTAL_MEM - 2097152 | bc`
+
+# Start the worker
+# run in ec2-user's home directory, in the subshell
+{
+ cd ~ec2-user
+ sudo -u ec2-user java8 -Xmx$'{MEM}'k -jar r5.jar worker worker.conf >> $LOGFILE 2>&1
+
+ # If the worker exits or doesn't start, wait a few minutes so that the CloudWatch log agent grabs
+ # the logs
+ sleep 120
+ halt -p
+} &