Skip to content

Commit

Permalink
fix performance degration of sjoin when using pygeos
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Mar 21, 2020
1 parent 02ea1ad commit a48c5e0
Showing 1 changed file with 23 additions and 12 deletions.
35 changes: 23 additions & 12 deletions geopandas/tools/sjoin.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,24 +146,35 @@ def sjoin(
r_idx = np.concatenate([[i] * len(v) for i, v in idxmatch.iteritems()])

if len(r_idx) > 0 and len(l_idx) > 0:
# Vectorize predicate operations
def find_intersects(a1, a2):
return a1.intersects(a2)
if compat.USE_PYGEOS:
import pygeos

predicate_d = {
"intersects": pygeos.intersects,
"contains": pygeos.contains,
"within": pygeos.contains,
}
check_predicates = predicate_d[op]
else:
# Vectorize predicate operations
def find_intersects(a1, a2):
return a1.intersects(a2)

def find_contains(a1, a2):
return a1.contains(a2)
def find_contains(a1, a2):
return a1.contains(a2)

predicate_d = {
"intersects": find_intersects,
"contains": find_contains,
"within": find_contains,
}
predicate_d = {
"intersects": find_intersects,
"contains": find_contains,
"within": find_contains,
}

check_predicates = np.vectorize(predicate_d[op])
check_predicates = np.vectorize(predicate_d[op])

if compat.USE_PYGEOS:
res = check_predicates(
left_df.geometry[l_idx], right_df[right_df.geometry.name][r_idx]
left_df.geometry[l_idx].array.data,
right_df[right_df.geometry.name][r_idx].array.data,
)
else:
res = check_predicates(
Expand Down

1 comment on commit a48c5e0

@adriangb
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jorisvandenbossche can't we use bulk_query on pygeos' STRTree (which will check the predicate itself) directly?

Please sign in to comment.