Merge branch 'master' into turnRestrictionRoadSplit

conveyal · Jul 27, 2017 · 35342b9 · 35342b9
2 parents e624c5e + 7e79ccb
commit 35342b9
Show file tree

Hide file tree

Showing 49 changed files with 2,061 additions and 612 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -43,6 +43,12 @@ after_success:
   # Upload the shaded jar. It's named with git describe, so begins with the last tag name, so begins with the letter v.
   # On the other hand, the main un-shaded artifact will begin with the project name 'r5'.
   - aws s3 cp --recursive --exclude "*" --include "v*.jar" /home/travis/build/conveyal/r5/target s3://r5-builds
+  # copy Javadoc to S3
+  # The syntax below sets JAVADOC_TARGET to TRAVIS_TAG if defined otherwise TRAVIS_BRANCH
+  # If TRAVIS_TAG is defined TRAVIS_BRANCH will be undefined pr travis docs, but travis will run two
+  # builds: one for the tag and one for the commit to the branch.
+  - JAVADOC_TARGET=${TRAVIS_TAG:-$TRAVIS_BRANCH}
+  - if [[ "$TRAVIS_PULL_REQUEST" = false ]]; then mvn javadoc:javadoc; aws s3 cp --cache-control max-age=300 --recursive /home/travis/build/conveyal/r5/target/site/apidocs s3://javadoc.conveyal.com/r5/${TRAVIS_BRANCH}/; fi
 
 sudo: true # sudo: true gets us more memory, which we need, at the expense of slower builds
 
@@ -55,4 +61,3 @@ cache:
 
 notifications:
   slack: conveyal:vqrkN7708qU1OTL9XkbMqQFV
-
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2014-2016 Conveyal
+Copyright (c) 2014-2017 Conveyal
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -9,6 +9,8 @@ on this particular point [here](http://conveyal.com/blog/2015/05/04/variation-in
 
 Please follow the Conveyal Java style guide at https://github.com/conveyal/JavaStyle/
 
+Javadoc for the project is built automatically after every change and published at http://javadoc.conveyal.com/r5/master/
+
 ## History
 
 R<sup>5</sup> grew out of several open-source projects. The focus on analytic applications, and the core development team behind R<sup>5</sup>,

diff --git a/pom.xml b/pom.xml
@@ -8,7 +8,7 @@
 
   <groupId>com.conveyal</groupId>
   <artifactId>r5</artifactId>
-  <version>2.2.0-SNAPSHOT</version>
+  <version>2.5.0-SNAPSHOT</version>
   <packaging>jar</packaging>
 
   <licenses>
@@ -72,6 +72,7 @@
         <filtering>true</filtering>
         <includes>
           <include>**/*.properties</include>
+          <include>worker.sh</include>
           <include>debug-plan/**</include>
           <include>fares/**</include>
         </includes>

diff --git a/src/main/java/com/conveyal/r5/R5Main.java b/src/main/java/com/conveyal/r5/R5Main.java
@@ -17,6 +17,13 @@
  */
 public class R5Main {
     public static void main (String... args) throws Exception {
+        System.out.println("__________________\n" +
+                "___  __ \\__  ____/\n" +
+                "__  /_/ /_____ \\  \n" +
+                "_  _, _/ ____/ /  \n" +
+                "/_/ |_| /_____/   \n" +
+                "                ");
+
         // Pull argument 0 off as the sub-command,
         // then pass the remaining args (1..n) on to that subcommand.
         String command = args[0];

diff --git a/.../java/com/conveyal/r5/analyst/BootstrapPercentileHypothesisTestGridStatisticComputer.java b/.../java/com/conveyal/r5/analyst/BootstrapPercentileHypothesisTestGridStatisticComputer.java
@@ -0,0 +1,85 @@
+package com.conveyal.r5.analyst;
+
+/**
+ * Compute p-values that the two regional analysis results differ due to systematic variation (change in transit network,
+ * percentile of interest, land use, time window, etc.) rather than due to random variation from the Monte Carlo search
+ * process. This uses the confidence interval method of bootstrap hypothesis testing described on page 214 of Efron and
+ * Tibshirani (1993). We perform a two-tailed test, so we are not making an assumptions about whether the change was positive
+ * or negative; a significant value indicates that there was a change, and that the sign is correct.
+ *
+ * This technique works by constructing a confidence interval that has one end at 0 and the other end where it may lie to
+ * make the confidence interval symmetric (i.e. the same amount of density in both tails). We use the percentile method
+ * to calculate this confidence interval; while we are aware there are issues with this, the better bootstrap confidence
+ * interval methods require us to store additional data so we can compute a jackknife estimate of acceleration (Efron
+ * and Tibshirani 1993, ch. 14).
+ *
+ * While it might initially seem that we could use the permutation test described earlier in that chapter, we in fact cannot.
+ * It attractively promises to compare two distributions, which is exactly what we have, but it promises to compare some
+ * statistic of those distributions. We're not interested in the difference in means of the sampling distributions, we're
+ * interested in the the difference of the full sampling distribution. To use the permutation test, we would have to apply
+ * it one level up the stack, when computing the regional results, and compute two regional runs simultaneously so we could
+ * permute them when we still had the travel times for all iterations in memory.
+ *
+ * References
+ * Efron, B., & Tibshirani, R. J. (1993). An Introduction to the Bootstrap. Boca Raton, FL: Chapman and Hall/CRC.
+ */
+public class BootstrapPercentileHypothesisTestGridStatisticComputer extends DualGridStatisticComputer {
+    @Override
+    protected double computeValuesForOrigin(int x, int y, int[] aValues, int[] bValues) {
+        // compute the value
+        int nBelowZero = 0;
+        int nZero = 0;
+        int nAboveZero = 0;
+        int nTotal = 0;
+
+        // get the point estimate of the difference
+        int pointEstimate = bValues[0] - aValues[0];
+        if (pointEstimate == 0) return 0; // no difference, not statistically significant
+
+        // subtract every value in b from every value in a
+        // this creates a bootstrapped sampling distribution of the differences, since each bootstrap sample in each analysis
+        // is independent of all others (we've taken a lot of care to ensure this is the case).
+        for (int aIdx = 1; aIdx < aValues.length; aIdx++) {
+            // TODO a and b values are used more than once. This doesn't create bootstrap dependence, correct?
+            int aVal = aValues[aIdx];
+            for (int bIdx = 1; bIdx < bValues.length; bIdx++, nTotal++) {
+                int bVal = bValues[bIdx];
+                int difference = bVal - aVal;
+
+                if (difference > 0) nAboveZero++;
+                else if (difference < 0) nBelowZero++;
+                else nZero++;
+            }
+        }
+
+        double pVal;
+
+        // if the point estimate was less than zero, assume the confidence interval is less than zero
+        // This could be wrong if the accessibility does not lie on the same side of zero as the majority of the density.
+        // TODO Efron and Tibshirani don't really discuss how to handle that case, and in particular don't discuss two-
+        // tailed tests at all. 
+        if (pointEstimate < 0) {
+            // compute the density that lies at or above zero. We have changed the technique slightly from what is
+            // described in Efron and Tibshirani 1993 to explicitly handle values that are exactly 0 (important because
+            // we are working with discretized, integer data).
+            double densityAtOrAboveZero = (double) (nZero + nAboveZero) / nTotal;
+            // two tailed test, take this much density off the other end as well, and that's the p-value for this test.
+            pVal = 2 * densityAtOrAboveZero;
+        } else {
+            // compute the density that lies at or below zero
+            double densityAtOrBelowZero = (double) (nBelowZero + nZero) / nTotal;
+            // two tailed test
+            pVal = 2 * densityAtOrBelowZero;
+        }
+
+        // fix up case where point est does not lie on same side of 0 as majority of density.
+        if (pVal > 1) pVal = 1;
+
+        // scale probability to 0 - 100,000
+        // note that the return value can actually be less than 0 if a majority of the density lies on the
+        // opposite side of zero from the point estimate. Maybe we shouldn't be looking at the point estimate
+        // NB for historical reasons, the probability grids show the probability of change, not the probability of no
+        // change (i.e. the p-value). Invert the values (i.e. replace p with alpha)
+        return (1 - pVal) * 1e5;
+    }
+}
diff --git a/src/main/java/com/conveyal/r5/analyst/DualGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/DualGridStatisticComputer.java
@@ -0,0 +1,130 @@
+package com.conveyal.r5.analyst;
+
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3Client;
+import com.amazonaws.services.s3.model.S3Object;
+import com.google.common.io.LittleEndianDataInputStream;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * A DualGridStatisticComputer computes statistics based on two grids.
+ */
+public abstract class DualGridStatisticComputer {
+    private static final AmazonS3 s3 = new AmazonS3Client();
+    /** Version of the access grid format we read */
+    private static final int ACCESS_GRID_VERSION = 0;
+
+    /**
+     * Calculate the probability at each origin that a random individual sample from regional analysis B is larger than one from regional
+     * analysis A. We do this empirically and exhaustively by for each origin looping over every possible combination of
+     * samples and taking a difference, then evaluating the number that yielded results greater than zero.
+     *
+     * The regional analysis access grids must be of identical size and zoom level, and a Grid object (the same as is used
+     * for destination grids) will be returned, with probabilities scaled from 0 to 100,000.
+     */
+    public Grid computeImprovementProbability (String resultBucket, String regionalAnalysisAKey, String regionalAnalysisBKey) throws IOException {
+        S3Object aGrid = s3.getObject(resultBucket, regionalAnalysisAKey);
+        S3Object bGrid = s3.getObject(resultBucket, regionalAnalysisBKey);
+        return computeImprovementProbability(aGrid.getObjectContent(), bGrid.getObjectContent());
+    }
+
+    public Grid computeImprovementProbability(InputStream a, InputStream b) throws IOException {
+        LittleEndianDataInputStream aIn = new LittleEndianDataInputStream(new GZIPInputStream(a));
+        LittleEndianDataInputStream bIn = new LittleEndianDataInputStream(new GZIPInputStream(b));
+
+        validateHeaderAndVersion(aIn);
+        validateHeaderAndVersion(bIn);
+
+        int aZoom = aIn.readInt();
+        int aWest = aIn.readInt();
+        int aNorth = aIn.readInt();
+        int aWidth = aIn.readInt();
+        int aHeight = aIn.readInt();
+
+        int bZoom = bIn.readInt();
+        int bWest = bIn.readInt();
+        int bNorth = bIn.readInt();
+        int bWidth = bIn.readInt();
+        int bHeight = bIn.readInt();
+
+        if (aZoom != bZoom ||
+                aWest != bWest ||
+                aNorth != bNorth ||
+                aWidth != bWidth ||
+                aHeight != bHeight) {
+            throw new IllegalArgumentException("Grid sizes for comparison must be identical!");
+        }
+
+        // number of iterations need not be equal, the computed probability is still valid even if they are not
+        // as the probability of choosing any particular sample is still uniform within each scenario.
+        int aIterations = aIn.readInt();
+        int bIterations = bIn.readInt();
+
+        Grid out = new Grid(aZoom, aWidth, aHeight, aNorth, aWest);
+
+        // pixels are in row-major order, iterate over y on outside
+        for (int y = 0; y < aHeight; y++) {
+            for (int x = 0; x < aWidth; x++) {
+                int[] aValues = new int[aIterations];
+                int[] bValues = new int[bIterations];
+
+                for (int iteration = 0, val = 0; iteration < aIterations; iteration++) {
+                    aValues[iteration] = (val += aIn.readInt());
+                }
+
+                for (int iteration = 0, val = 0; iteration < bIterations; iteration++) {
+                    bValues[iteration] = (val += bIn.readInt());
+                }
+
+                out.grid[x][y] = computeValuesForOrigin(x, y, aValues, bValues);
+            }
+        }
+
+        return out;
+    }
+
+    private static void validateHeaderAndVersion(LittleEndianDataInputStream input) throws IOException {
+        char[] header = new char[8];
+        for (int i = 0; i < 8; i++) {
+            header[i] = (char) input.readByte();
+        }
+
+        if (!"ACCESSGR".equals(new String(header))) {
+            throw new IllegalArgumentException("Input not in access grid format!");
+        }
+
+        int version = input.readInt();
+
+        if (version != ACCESS_GRID_VERSION) {
+            throw new IllegalArgumentException(String.format("Version mismatch of access grids, expected %s, found %s", ACCESS_GRID_VERSION, version));
+        }
+    }
+
+    /** Given the origin coordinates and the values from the two grids, compute a value for the output grid */
+    protected abstract double computeValuesForOrigin (int x, int y, int[] aValues, int[] bValues);
+
+    public static void main (String... args) throws IOException {
+        DualGridStatisticComputer comp;
+        if ("--two-tailed".equals(args[0])) {
+            comp = new BootstrapPercentileHypothesisTestGridStatisticComputer();
+        } else {
+            throw new RuntimeException("Unknown grid statistic computer " + args[0]);
+        }
+
+        FileInputStream a = new FileInputStream(args[1]);
+        FileInputStream b = new FileInputStream(args[2]);
+        Grid grid = comp.computeImprovementProbability(a, b);
+
+        if (args[3].endsWith(".grid"))
+            grid.write(new FileOutputStream(args[3]));
+        else if (args[3].endsWith(".png"))
+            grid.writePng(new FileOutputStream(args[3]));
+        else if (args[3].endsWith(".tif"))
+            grid.writeGeotiff(new FileOutputStream(args[3]));
+    }
+}
diff --git a/src/main/java/com/conveyal/r5/analyst/ExtractingGridStatisticComputer.java b/src/main/java/com/conveyal/r5/analyst/ExtractingGridStatisticComputer.java
@@ -0,0 +1,38 @@
+package com.conveyal.r5.analyst;
+
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3Client;
+import com.amazonaws.services.s3.model.S3Object;
+import com.conveyal.r5.analyst.scenario.GridStatisticComputer;
+import com.google.common.io.LittleEndianDataInputStream;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * Access grids are three-dimensional arrays, with the first two dimensions consisting of x and y coordinates of origins
+ * within the regional analysis, and the third dimension reflects multiple values of the indicator of interest. This could
+ * be instantaneous accessibility results for each Monte Carlo draw when computing average instantaneous accessibility (i.e.
+ * Owen-style accessibility), or it could be multiple bootstrap replications of the sampling distribution of accessibility
+ * given median travel time (see Conway, M. W., Byrd, A. and van Eggermond, M. "A Statistical Approach to Comparing
+ * Accessibility Results: Including Uncertainty in Public Transport Sketch Planning," paper presented at the 2017 World
+ * Symposium of Transport and Land Use Research, Brisbane, QLD, Australia, Jul 3-6.)
+ *
+ * An ExtractingGridStatisticComputer simply grabs the value at a particular index within each origin.
+ * When storing bootstrap replications of travel time, we also store the point estimate (using all Monte Carlo draws
+ * equally weighted) as the first value, so an ExtractingGridStatisticComputer(0) can be used to retrieve the point estimate.
+ */
+public class ExtractingGridStatisticComputer extends GridStatisticComputer {
+    public final int index;
+
+    /** Initialize with the index to extract */
+    public ExtractingGridStatisticComputer (int index) {
+        this.index = index;
+    }
+
+    @Override
+    protected double computeValueForOrigin(int x, int y, int[] valuesThisOrigin) {
+        return valuesThisOrigin[index];
+    }
+}
diff --git a/src/main/java/com/conveyal/r5/analyst/Grid.java b/src/main/java/com/conveyal/r5/analyst/Grid.java
@@ -122,12 +122,24 @@ public Grid (int zoom, int width, int height, int north, int west) {
         this.grid = new double[width][height];
     }
 
+    /**
+     * Version of getPixelWeights which returns the weights as relative to the total area of the input geometry (i.e.
+     * the weight at a pixel is the proportion of the input geometry that falls within that pixel.
+     */
+    public TObjectDoubleMap<int[]> getPixelWeights (Geometry geometry) {
+        return getPixelWeights(geometry, false);
+    }
+
     /**
      * Get the proportions of an input polygon feature that overlap each grid cell, in the format [x, y] => weight.
      * This weight object can then be fed into the incrementFromPixelWeights function to actually burn a polygon into the
      * grid.
+     *
+     * If relativeToPixels is true, the weights are the proportion of the pixel that is covered. Otherwise they are the
+     * portion of this polygon which is within the given grid cell. If using incrementPixelWeights, this should be set to
+     * false.
      */
-    public TObjectDoubleMap<int[]> getPixelWeights (Geometry geometry) {
+    public TObjectDoubleMap<int[]> getPixelWeights (Geometry geometry, boolean relativeToPixels) {
         // No need to convert to a local coordinate system
         // Both the supplied polygon and the web mercator pixel geometries are left in WGS84 geographic coordinates.
         // Both are distorted equally along the X axis at a given latitude so the proportion of the geometry within
@@ -151,7 +163,8 @@ public TObjectDoubleMap<int[]> getPixelWeights (Geometry geometry) {
 
                 Geometry pixel = getPixelGeometry(x + west, y + north, zoom);
                 Geometry intersection = pixel.intersection(geometry);
-                double weight = intersection.getArea() / area;
+                double denominator = relativeToPixels ? pixel.getArea() : area;
+                double weight = intersection.getArea() / denominator;
                 weights.put(new int[] { x, y }, weight);
             }
         }
@@ -345,20 +358,34 @@ public void writeShapefile (String fileName, String fieldName) {
 
     /* functions below from http://wiki.openstreetmap.org/wiki/Slippy_map_tilenames#Mathematics */
 
+    /** Return the pixel the given longitude falls within */
     public static int lonToPixel (double lon, int zoom) {
         return (int) ((lon + 180) / 360 * Math.pow(2, zoom) * 256);
     }
 
+    /** return the west side of the given pixel (assuming an integer pixel; noninteger pixels will return the appropriate location within the pixel) */
     public static double pixelToLon (double pixel, int zoom) {
         return pixel / (Math.pow(2, zoom) * 256) * 360 - 180;
     }
 
+    /** Return the longitude of the center of the given pixel */
+    public static double pixelToCenterLon (int pixel, int zoom) {
+        return pixelToLon(pixel + 0.5, zoom);
+    }
+
+    /** Return the pixel the given latitude falls within */
     public static int latToPixel (double lat, int zoom) {
         double latRad = FastMath.toRadians(lat);
         return (int) ((1 - log(tan(latRad) + 1 / cos(latRad)) / Math.PI) * Math.pow(2, zoom - 1) * 256);
     }
 
-    // We're using FastMath here, because the built-in math functions were taking a laarge amount of time in profiling.
+    /** Return the latitude of the center of the given pixel */
+    public static double pixelToCenterLat (int pixel, int zoom) {
+        return pixelToLat(pixel + 0.5, zoom);
+    }
+
+    // We're using FastMath here, because the built-in math functions were taking a large amount of time in profiling.
+    /** return the north side of the given pixel (assuming an integer pixel; noninteger pixels will return the appropriate location within the pixel) */
     public static double pixelToLat (double pixel, int zoom) {
         return FastMath.toDegrees(atan(sinh(Math.PI - (pixel / 256d) / Math.pow(2, zoom) * 2 * Math.PI)));
     }