Merge pull request #151 from UDST/dev

Finalizing Pandana v0.6 release
UDST · Nov 25, 2020 · f76cbe6 · f76cbe6
2 parents 740c1d7 + 20af9a5
commit f76cbe6
Show file tree

Hide file tree

Showing 14 changed files with 157 additions and 52 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,20 +1,27 @@
+v0.6
+====
+
+2020/11/20
+
+* Adds vectorized, multi-threaded calculation of many shortest path routes at once
+* Restores usability of network.plot() by eliminating usage of Matplotlib's deprecated Basemap toolkit
+
 v0.5.1
 ======
 
 2020/08/05
 
-* Fixes a performance bug in network.get_node_ids()
+* Fixes a performance regression in network.get_node_ids()
 
 v0.5
 ====
 
 2020/07/28
 
-* Adds support for calculating shortest path lengths between arbitrary origins and destinations, with vectorization and multi-threading
+* Adds support for calculating shortest path distances between arbitrary origins and destinations, with vectorization and multi-threading
 * Restores alternate names for aggregation types, which were inadvertently removed in v0.4
 * Fixes a bug with matplotlib backends
 * Improves compilation in MacOS 10.15 Catalina
-* Eliminates the scikit-learn dependency
 * Makes matplotlib and osmnet dependencies optional
 * Revises the documentation and demo notebook
 

diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 
 Pandana is a Python library for network analysis that uses [contraction hierarchies](https://en.wikipedia.org/wiki/Contraction_hierarchies) to calculate super-fast travel accessibility metrics and shortest paths. The numerical code is in C++.
 
-v0.5 adds vectorized calculation of shortest path lengths: [network.shortest_path_lengths()](http://udst.github.io/pandana/network.html#pandana.network.Network.shortest_path_lengths). 
+New in v0.5 and v0.6 is vectorized, multi-threaded calculation of shortest path routes and distances: [network.shortest_paths()](http://udst.github.io/pandana/network.html#pandana.network.Network.shortest_paths), [network.shortest_path_lengths()](http://udst.github.io/pandana/network.html#pandana.network.Network.shortest_path_lengths). 
 
 Documentation: http://udst.github.io/pandana
 

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -1,23 +1,30 @@
 Change log
 ==========
 
+v0.6
+----
+
+2020/11/20
+
+* Adds vectorized, multi-threaded `calculation of many shortest path routes <network.html#pandana.network.Network.shortest_paths>`_ at once
+* Restores usability of `network.plot() <network.html#pandana.network.Network.plot>`_ by eliminating usage of Matplotlib's deprecated Basemap toolkit
+
 v0.5.1
 ------
 
 2020/08/05
 
-* Fixes a performance bug in network.get_node_ids()
+* Fixes a performance regression in `network.get_node_ids() <network.html#pandana.network.Network.get_node_ids>`_
 
 v0.5
 ----
 
 2020/07/28
 
-* Adds support for `calculating shortest path lengths <network.html#pandana.network.Network.shortest_path_lengths>`_ between arbitrary origins and destinations, with vectorization and multi-threading
+* Adds support for `calculating shortest path distances <network.html#pandana.network.Network.shortest_path_lengths>`_ between arbitrary origins and destinations, with vectorization and multi-threading
 * Restores alternate names for aggregation types, which were inadvertently removed in v0.4
 * Fixes a bug with matplotlib backends
 * Improves compilation in MacOS 10.15 Catalina
-* Eliminates the scikit-learn dependency
 * Makes matplotlib and osmnet dependencies optional
 * Revises the documentation and demo notebook
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -57,9 +57,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.5.1'
+version = '0.6'
 # The full version, including alpha/beta/rc tags.
-release = '0.5.1'
+release = '0.6'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -8,7 +8,7 @@ Pandana
 
 Pandana is a Python library for network analysis that uses `contraction hierarchies <https://en.wikipedia.org/wiki/Contraction_hierarchies>`_ to calculate super-fast travel accessibility metrics and shortest paths. The numerical code is in C++.
 
-v0.5.1, released August 5, 2020
+v0.6, released November 11, 2020
 
 
 Acknowledgments

diff --git a/examples/shortest_path_example.py b/examples/shortest_path_example.py
@@ -71,16 +71,26 @@
 print(net.shortest_path_length(nodes_a[1],nodes_b[1]))
 
 print('Repeat with vectorized calculations:')
+print(net.shortest_paths(nodes_a[0:2],nodes_b[0:2]))
 print(net.shortest_path_lengths(nodes_a[0:2],nodes_b[0:2]))
 
 # Performance comparison
 print('Performance comparison for 10k distance calculations:')
 
+t0 = time.time()
+for i in range(n):
+    _ = net.shortest_path(nodes_a[i], nodes_b[i])
+print('Route loop time = {} sec'.format(time.time() - t0))
+
+t0 = time.time()
+_ = net.shortest_paths(nodes_a, nodes_b)
+print('Route vectorized time = {} sec'.format(time.time() - t0))
+
 t0 = time.time()
 for i in range(n):
     _ = net.shortest_path_length(nodes_a[i], nodes_b[i])
-print('Loop time = {} sec'.format(time.time() - t0))
+print('Distance loop time = {} sec'.format(time.time() - t0))
 
 t0 = time.time()
 _ = net.shortest_path_lengths(nodes_a, nodes_b)
-print('Vectorized time = {} sec'.format(time.time() - t0))
+print('Distance vectorized time = {} sec'.format(time.time() - t0))
diff --git a/pandana/__init__.py b/pandana/__init__.py
@@ -1,3 +1,3 @@
 from .network import Network
 
-version = __version__ = '0.5.1'
+version = __version__ = '0.6'
diff --git a/pandana/loaders/tests/test_osm.py b/pandana/loaders/tests/test_osm.py
@@ -92,10 +92,10 @@ def test_node_query(bbox2):
     tags = '"amenity"="restaurant"'
     cafes = osm.node_query(*bbox2, tags=tags)
 
-    assert len(cafes) == 4
+    assert len(cafes) == 2
     assert 'lat' in cafes.columns
     assert 'lon' in cafes.columns
-    assert cafes['name'][2965338413] == 'Koja Kitchen'
+    assert cafes['name'][1419597327] == 'Cream'
 
 
 def test_node_query_raises():

diff --git a/pandana/network.py b/pandana/network.py
@@ -199,6 +199,45 @@ def shortest_path(self, node_a, node_b, imp_name=None):
         # map back to external node ids
         return self.node_ids.values[path]
 
+    def shortest_paths(self, nodes_a, nodes_b, imp_name=None):
+        """
+        Vectorized calculation of shortest paths. Accepts a list of origins
+        and list of destinations and returns a corresponding list of
+        shortest path routes. Must provide an impedance name if more than
+        one is available.
+
+        Added in Pandana v0.6.
+
+        Parameters
+        ----------
+        nodes_a : list-like of ints
+            Source node ids
+        nodes_b : list-like of ints
+            Corresponding destination node ids
+        imp_name : string
+            The impedance name to use for the shortest path
+
+        Returns
+        -------
+        paths : list of np.ndarray
+            Nodes traversed in each shortest path
+
+        """
+        if len(nodes_a) != len(nodes_b):
+            raise ValueError("Origin and destination counts don't match: {}, {}"
+                             .format(len(nodes_a), len(nodes_b)))
+
+        # map to internal node indexes
+        nodes_a_idx = self._node_indexes(pd.Series(nodes_a)).values
+        nodes_b_idx = self._node_indexes(pd.Series(nodes_b)).values
+
+        imp_num = self._imp_name_to_num(imp_name)
+
+        paths = self.net.shortest_paths(nodes_a_idx, nodes_b_idx, imp_num)
+
+        # map back to external node ids
+        return [self.node_ids.values[p] for p in paths]
+
     def shortest_path_length(self, node_a, node_b, imp_name=None):
         """
         Return the length of the shortest path between two node ids in the
@@ -208,6 +247,8 @@ def shortest_path_length(self, node_a, node_b, imp_name=None):
         If you have a large number of paths to calculate, don't use this
         function! Use the vectorized one instead.
 
+        Added in Pandana v0.5.
+
         Parameters
         ----------
         node_a : int
@@ -240,6 +281,8 @@ def shortest_path_lengths(self, nodes_a, nodes_b, imp_name=None):
         of shortest path lengths. Must provide an impedance name if more
         than one is available.
 
+        Added in Pandana v0.5.
+
         Parameters
         ----------
         nodes_a : list-like of ints
@@ -436,7 +479,7 @@ def aggregate(self, distance, type="sum", decay="linear", imp_name=None,
 
     def get_node_ids(self, x_col, y_col, mapping_distance=None):
         """
-        Assign node_ids to data specified by x_col and y_col
+        Assign node_ids to data specified by x_col and y_col.
 
         Parameters
         ----------
@@ -481,15 +524,16 @@ def get_node_ids(self, x_col, y_col, mapping_distance=None):
 
         return df.node_id
 
-    def plot(
-            self, data, bbox=None, plot_type='scatter',
-            fig_kwargs=None, bmap_kwargs=None, plot_kwargs=None,
-            cbar_kwargs=None):
+    def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None,
+             plot_kwargs=None, cbar_kwargs=None):
         """
-        Plot an array of data on a map using matplotlib and Basemap,
-        automatically matching the data to the Pandana network node positions.
+        Plot an array of data on a map using Matplotlib, automatically matching
+        the data to the Pandana network node positions. Keyword arguments are
+        passed to the plotting routine.
 
-        Keyword arguments are passed to the plotting routine.
+        Modified in Pandana v0.6 to eliminate usage of Matplotlib's deprecated
+        Basemap toolkit. No longer accepts bmap_kwargs and no longer returns
+        a Basemap object.
 
         Parameters
         ----------
@@ -500,22 +544,17 @@ def plot(
             (lat_min, lng_min, lat_max, lng_max)
         plot_type : {'hexbin', 'scatter'}, optional
         fig_kwargs : dict, optional
-            Keyword arguments that will be passed to
-            matplotlib.pyplot.subplots. Use this to specify things like
-            figure size or background color.
-        bmap_kwargs : dict, optional
-            Keyword arguments that will be passed to the Basemap constructor.
-            This can be used to specify a projection or coastline resolution.
+            Keyword arguments that will be passed to matplotlib.pyplot.subplots.
+            Use this to specify things like figure size or background color.
         plot_kwargs : dict, optional
             Keyword arguments that will be passed to the matplotlib plotting
-            command used. Use this to control plot styles and color maps used.
+            command. Use this to control plot styles and color maps.
         cbar_kwargs : dict, optional
-            Keyword arguments passed to the Basemap.colorbar method.
+            Keyword arguments that will be passed to matplotlib.pyplot.colorbar.
             Use this to control color bar location and label.
 
         Returns
         -------
-        bmap : Basemap
         fig : matplotlib.Figure
         ax : matplotlib.Axes
 
@@ -528,14 +567,11 @@ def plot(
         try:
             import matplotlib
             import matplotlib.pyplot as plt
-            from mpl_toolkits.basemap import Basemap
         except (ModuleNotFoundError, RuntimeError):
-            raise ModuleNotFoundError("Pandana's network.plot() requires Matplotlib and "
-                                      "the Matplotlib Basemap Toolkit")
+            raise ModuleNotFoundError("Pandana's network.plot() requires Matplotlib")
 
-        fig_kwargs = fig_kwargs or {}
-        bmap_kwargs = bmap_kwargs or {}
-        plot_kwargs = plot_kwargs or {}
+        fig_kwargs = fig_kwargs or {'figsize': (10, 8)}
+        plot_kwargs = plot_kwargs or {'cmap': 'hot_r', 's': 1}
         cbar_kwargs = cbar_kwargs or {}
 
         if not bbox:
@@ -547,23 +583,20 @@ def plot(
 
         fig, ax = plt.subplots(**fig_kwargs)
 
-        bmap = Basemap(
-            bbox[1], bbox[0], bbox[3], bbox[2], ax=ax, **bmap_kwargs)
-        bmap.drawcoastlines()
-        bmap.drawmapboundary()
-
-        x, y = bmap(self.nodes_df.x.values, self.nodes_df.y.values)
+        x, y = (self.nodes_df.x.values, self.nodes_df.y.values)
 
         if plot_type == 'scatter':
-            plot = bmap.scatter(
+            plot = plt.scatter(
                 x, y, c=data.values, **plot_kwargs)
         elif plot_type == 'hexbin':
-            plot = bmap.hexbin(
+            plot = plt.hexbin(
                 x, y, C=data.values, **plot_kwargs)
 
-        bmap.colorbar(plot, **cbar_kwargs)
+        colorbar = plt.colorbar(plot, **cbar_kwargs)
+
+        plt.show()
 
-        return bmap, fig, ax
+        return fig, ax
 
     def init_pois(self, num_categories, max_dist, max_pois):
         """

diff --git a/pandana/tests/test_pandana.py b/pandana/tests/test_pandana.py
@@ -268,6 +268,23 @@ def test_shortest_path(sample_osm):
         assert ids[1] == path[-1]
 
 
+def test_shortest_paths(sample_osm):
+
+    nodes = random_connected_nodes(sample_osm, 100)
+    vec_paths = sample_osm.shortest_paths(nodes[0:50], nodes[50:100])
+
+    for i in range(50):
+        path = sample_osm.shortest_path(nodes[i], nodes[i+50])
+        assert(np.array_equal(vec_paths[i], path))
+
+    # check mismatched OD lists
+    try:
+        vec_paths = sample_osm.shortest_paths(nodes[0:51], nodes[50:100])
+        assert 0
+    except ValueError as e:
+        pass
+
+
 def test_shortest_path_length(sample_osm):
 
     for i in range(10):

diff --git a/setup.py b/setup.py
@@ -131,7 +131,7 @@ def run(self):
 ## Standard setup
 ###############################################
 
-version = '0.5.1'
+version = '0.6'
 
 packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"])
 

diff --git a/src/accessibility.cpp b/src/accessibility.cpp
@@ -96,24 +96,41 @@ Accessibility::precomputeRangeQueries(float radius) {
 }
 
 
-std::vector<int>
+vector<int>
 Accessibility::Route(int src, int tgt, int graphno) {
     vector<NodeID> ret = this->ga[graphno]->Route(src, tgt);
     return vector<int> (ret.begin(), ret.end());
 }
 
 
+vector<vector<int>>
+Accessibility::Routes(vector<long> sources, vector<long> targets, int graphno) {
+
+    int n = std::min(sources.size(), targets.size()); // in case lists don't match
+    vector<vector<int>> routes(n);
+
+    #pragma omp parallel
+    #pragma omp for schedule(guided)
+    for (int i = 0 ; i < n ; i++) {
+        vector<NodeID> ret = this->ga[graphno]->Route(sources[i], targets[i], 
+            omp_get_thread_num());
+        routes[i] = vector<int> (ret.begin(), ret.end());
+    }
+    return routes;
+}
+
+
 double
 Accessibility::Distance(int src, int tgt, int graphno) {
     return this->ga[graphno]->Distance(src, tgt);
 }
 
 
-std::vector<double>
+vector<double>
 Accessibility::Distances(vector<long> sources, vector<long> targets, int graphno) {                       
 
     int n = std::min(sources.size(), targets.size()); // in case lists don't match
-    vector<double> distances (n);
+    vector<double> distances(n);
 
     #pragma omp parallel
     #pragma omp for schedule(guided)