In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Some data

In [None]:
observations = 100_000

x, y = np.random.rand(observations), np.random.rand(observations)
ids = np.repeat(np.arange(observations//10), 10)
df = pd.DataFrame({'x': x, 'y':y, 'id':ids})

Check if polygon (red) contains these points

In [None]:
from matplotlib.patches import Polygon

ax = plt.gca()
df.sample(1000).plot.scatter(x='x', y='y', ax=ax)

polygon_coords = np.array([[0.0, 0.0], [0.0, 0.5], [0.5, 0.5], [0.5, 0.0]])

p = Polygon(polygon_coords, closed=False, fc='red', alpha=.5, ec='red')
ax.add_patch(p)

In [None]:
%load_ext memory_profiler

## GeoPandas

In [None]:
import geopandas
from shapely.geometry import Polygon

In [None]:
%%timeit

gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.x, df.y))
zone = Polygon(polygon_coords)
gdf.within(zone)

In [None]:
%%memit

gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.x, df.y))
zone = Polygon(polygon_coords)
gdf.within(zone)

## Rustgeos

In [None]:
import rustgeos
result_rust = rustgeos.contains(polygon_coords, df.x.values, df.y.values)

In [None]:
%%timeit
rustgeos.contains(polygon_coords, df.x.values, df.y.values)

In [None]:
%%memit
rustgeos.contains(polygon_coords, df.x.values, df.y.values)

## Shapely vectorized

In [None]:
from shapely.geometry import Polygon, LineString
from shapely.prepared import prep
from shapely.vectorized import contains

In [None]:
%%timeit
zone = Polygon(polygon_coords)
contains(prep(zone), df.x.values, df.y.values)

In [None]:
result_shapely = contains(prep(zone), df.x.values, df.y.values)

In [None]:
%%memit
zone = Polygon(polygon_coords)
contains(prep(zone), df.x.values, df.y.values)

## PyGeos

In [None]:
from pygeos.predicates import contains
from pygeos.creation import box, points
from pygeos.creation import Geometry

In [None]:
%%timeit

geoms = points(df.x.values, df.y.values)

xy_with_space = [' '.join([str(x) for x in a]) for a in polygon_coords]
xy_closed = xy_with_space + xy_with_space[:1]
coords_string = ', '.join(xy_closed).replace('[', '(').replace(']', ')')
poly = Geometry(f"POLYGON(({coords_string}))")

contains(poly, geoms)

In [None]:
result_pygeos = contains(poly, geoms)

In [None]:
%%memit

geoms = points(df.x.values, df.y.values)

xy_with_space = [' '.join([str(x) for x in a]) for a in polygon_coords]
xy_closed = xy_with_space + xy_with_space[:1]
coords_string = ', '.join(xy_closed).replace('[', '(').replace(']', ')')
poly = Geometry(f"POLYGON(({coords_string}))")

contains(poly, geoms)

## Just to be sure

In [None]:
np.testing.assert_equal(result_rust, result_shapely)
np.testing.assert_equal(result_rust, result_pygeos)

## Performance comparison

In [None]:
def annotate_bars(ax):
    for i in ax.patches:
        ax.text(i._x0, i._height*1.01, 
                str(int(i._height)), fontsize=10)
    return ax

def beautify_ax(ax):
    """ Simple function to format the axes """
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    ax.spines['left'].set_position(('data', -.5))
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    return ax

fig, axes = plt.subplots(1, 2, figsize=(15,5))

mem_stats = pd.DataFrame({'1e6 coordinates': [510, 340, 164, 162],
                          '1e7 coordinates': [3882, 2412, 654, 660]}, 
                         index=['GeoPandas', 'PyGEOS', 'Shapely Vectorized', 'rustgeos'])


cpu_stats = pd.DataFrame({'1e6 coordinates': [446, 260, 109, 37],
                          '1e7 coordinates': [4830, 2460, 1110, 382]}, 
                         index=['GeoPandas', 'PyGEOS', 'Shapely Vectorized', 'rustgeos'])

cpu_stats.plot.bar(ax=axes[0])
axes[0].set_title("Speed")
axes[0].set_ylabel('execution time (ms)')
annotate_bars(axes[0])
beautify_ax(axes[0])

mem_stats.plot.bar(ax=axes[1])
axes[1].set_title("Memory usage")
axes[1].set_ylabel('peak memory (MiB)')
annotate_bars(axes[1])
beautify_ax(axes[1])

fig.suptitle("'Contain' operation on polygon, with x/y coordinates (lower is better)");