In [1]:
import concurrent.futures as cf
import itertools

import gps2var
import numpy as np
import pyproj
import rasterio

In [2]:
DATA_PATH = "wildareas-v3-2009-human-footprint.tif"

In [3]:
def gen_inputs(scale=1, size=10000, seed=42):
    rng = np.random.default_rng(seed)
    while True:
        yield rng.uniform(-scale, scale, size=(size, 2)).T

## Basic synchronous reading

In [4]:
def time_rasterio_sample(scale):
    """rasterio.DatasetReader.sample()"""
    inputs = gen_inputs(scale=scale)
    with rasterio.open(DATA_PATH) as dataset:
        transformer = pyproj.Transformer.from_crs("EPSG:4326", dataset.crs, always_xy=True)
        %timeit list(dataset.sample(zip(*transformer.transform(*next(inputs)))))

In [5]:
def time_reader(scale):
    """RasterValueReader()"""
    inputs = gen_inputs(scale=scale)
    with gps2var.RasterValueReader(DATA_PATH) as reader:
        %timeit reader.get(*next(inputs))

In [6]:
def time_reader_preload(scale):
    """RasterValueReader(preload_all=True)"""
    inputs = gen_inputs(scale=scale)
    with gps2var.RasterValueReader(DATA_PATH, preload_all=True) as reader:
        %timeit reader.get(*next(inputs))

In [7]:
for scale in [0.1, 1, 10, 45]:
    print(f"scale={scale}")
    for fn in [time_rasterio_sample, time_reader, time_reader_preload]:
        print("  {:40}".format(fn.__doc__), end="")
        fn(scale=scale)

scale=0.1
  rasterio.DatasetReader.sample()         535 ms ± 2.11 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  RasterValueReader()                     8.57 ms ± 20.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
  RasterValueReader(preload_all=True)     7.87 ms ± 4.95 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
scale=1
  rasterio.DatasetReader.sample()         534 ms ± 3.25 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  RasterValueReader()                     9.85 ms ± 86.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
  RasterValueReader(preload_all=True)     8.14 ms ± 65.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
scale=10
  rasterio.DatasetReader.sample()         544 ms ± 12.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  RasterValueReader()                     68 ms ± 166 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
  RasterValueReader(preload_all=True)     8.43 ms ± 21.1 µs per loop (mean ± 

## Concurrent reading, interpolation

In [8]:
for kwargs in itertools.product([False, True], [1, 10], ["nearest", "bilinear"]):
    kwargs = dict(zip(["preload_all", "scale", "interpolation"], kwargs))
    print(", ".join("{}={!r}".format(k, v) for k, v in kwargs.items()))
    scale = kwargs.pop("scale")

    print("  sync:   ", end="")
    inputs = gen_inputs(scale=scale)
    with gps2var.RasterValueReaderPool(DATA_PATH, num_workers=8, **kwargs) as reader:    
        %timeit [reader.get(*next(inputs)) for _ in range(32)]

    print("  async:  ", end="")
    inputs = gen_inputs(scale=scale)
    with gps2var.RasterValueReaderPool(DATA_PATH, num_workers=8, **kwargs) as reader:
        %timeit list(cf.as_completed(reader.async_get(*next(inputs)) for _ in range(32)))

preload_all=False, scale=1, interpolation='nearest'
  sync:   377 ms ± 9.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  async:  66.8 ms ± 3.92 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
preload_all=False, scale=1, interpolation='bilinear'
  sync:   1.08 s ± 1.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  async:  208 ms ± 10.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
preload_all=False, scale=10, interpolation='nearest'
  sync:   2.26 s ± 2.74 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  async:  391 ms ± 11.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
preload_all=False, scale=10, interpolation='bilinear'
  sync:   3.61 s ± 5.78 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  async:  602 ms ± 18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
preload_all=True, scale=1, interpolation='nearest'
  sync:   334 ms ± 7.07 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
  async:  115 ms ± 2.34 ms per loop (m