From 15c0000882bced2a151c0fe63b3c35ce9608a5c3 Mon Sep 17 00:00:00 2001 From: Philipp Kats Date: Sun, 7 Aug 2016 11:21:38 -0400 Subject: [PATCH] separate spatial_index function, might be of use for "millions_to_hundreds" sjoint, where we split data into chunks and don't want to generate a rtree.Index for each chunk once again --- geopandas/tools/sjoin.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/geopandas/tools/sjoin.py b/geopandas/tools/sjoin.py index 9dd16b9de9..af92507ba3 100644 --- a/geopandas/tools/sjoin.py +++ b/geopandas/tools/sjoin.py @@ -3,8 +3,29 @@ from shapely import prepared +def rtree_index(geom): + '''create a spatial index of a geometry + + Parameters + ---------- + geom : pd.Series with shapely geometry objects + + Returns + ------- + rtree.index : spatial index + ''' + import rtree + + tree_idx = rtree.index.Index() + geom_bounds = geom.apply(lambda x: x.bounds) + for i in geom_bounds.index: + tree_idx.insert(i, geom_bounds[i]) + + return tree_idx + + def sjoin(left_df, right_df, how='inner', op='intersects', - lsuffix='left', rsuffix='right'): + lsuffix='left', rsuffix='right', tree_idx=None): """Spatial join of two GeoDataFrames. Parameters @@ -24,9 +45,9 @@ def sjoin(left_df, right_df, how='inner', op='intersects', Suffix to apply to overlapping column names (left GeoDataFrame). rsuffix : string, default 'right' Suffix to apply to overlapping column names (right GeoDataFrame). + tree_idx : rtree.index for right GeoDataFrame, if None, will be created. """ - import rtree allowed_hows = ['left', 'right', 'inner'] if how not in allowed_hows: @@ -45,10 +66,9 @@ def sjoin(left_df, right_df, how='inner', op='intersects', if left_df.crs != right_df.crs: print('Warning: CRS does not match!') - tree_idx = rtree.index.Index() - right_df_bounds = right_df['geometry'].apply(lambda x: x.bounds) - for i in right_df_bounds.index: - tree_idx.insert(i, right_df_bounds[i]) + if tree_idx is None: + tree_idx = rtree_index(right_df['geometry']) + idxmatch = (left_df['geometry'].apply(lambda x: x.bounds) .apply(lambda x: list(tree_idx.intersection(x))))