In [1]:
%load_ext memory_profiler

## Run benchmark with

From repo root/base folder

```bash
mprof run python extract_point_from_raster_buffer.py -f srg-dev/test-data/pop_density/pop_density/*.tif -g srg-dev/test-data/1000_testing_points.rds
```

## Run all the cells below to record peak memory and time

In [2]:
import pandas as pd
# import geopandas as gpd
import rioxarray as riox

from pyproj import Transformer
from shapely.geometry import mapping, Point

start = pd.Timestamp('now')
buffer_value = 2_000

## Run the analysis in a scalable way

load the raster and fill NaNs with 0

In [3]:
myraster = (
    riox.open_rasterio('test-data/pop_density/pop_density/apg18e_1_0_0_20210512.tif')
    .sel(band=1)
)

In [4]:
myraster = myraster.where(myraster != myraster.rio.nodata, drop=True)

In [5]:
"{:,}".format(myraster.data.shape[0] * myraster.data.shape[1])

'15,138,272'

Load points for data extraction and create buffers

In [6]:
transformer = Transformer.from_crs("EPSG:3577", myraster.rio.crs, always_xy=True)

In [7]:
points = (
    pd.read_csv('test-data/1000_testing_points.csv')
    .rename(columns={'X': 'x', 'Y': 'y'})
    .assign(
        lat_lon_tuple = lambda columns: columns[['x', 'y']].apply(lambda row: transformer.transform(row['x'], row['y']), axis=1),
        lat = lambda columns: columns['lat_lon_tuple'].apply(lambda el: el[0]),
        lon = lambda columns: columns['lat_lon_tuple'].apply(lambda el: el[1]),
        points = lambda columns: columns['lat_lon_tuple'].apply(Point),
        points_buffer = lambda columns: columns['points'].apply(lambda x: x.buffer(buffer_value))
    )
)

Memory of dataframe in MB

In [8]:
def extract_mean_from_buffer(raster, geom):
    data_points = pd.Series(geom.exterior.coords)
    values_from_raster = data_points.apply(lambda row: raster.sel(x=row[0], y=row[1], method="nearest").item()).values
    return values_from_raster.mean()

In [9]:
%%memit
points['extracted_mean'] = points['points_buffer'].apply(lambda x: extract_mean_from_buffer(myraster, x)) * myraster.attrs['scale_factor'] + myraster.attrs['add_offset']

peak memory: 389.10 MiB, increment: 1.02 MiB


In [10]:
print(f"unning time: {pd.Timestamp('now') - start}")

unning time: 0 days 00:00:40.566406


In [11]:
points.sample(20)

Unnamed: 0,x,y,lat_lon_tuple,lat,lon,points,points_buffer,extracted_mean
534,888770.354704,-3184429.0,"(888770.3547041441, -3184428.5805817773)",888770.354704,-3184429.0,POINT (888770.3547041441 -3184428.580581777),POLYGON ((890770.3547041441 -3184428.580581777...,0.0
757,914116.651546,-3184429.0,"(914116.6515459762, -3184428.5805817773)",914116.651546,-3184429.0,POINT (914116.6515459762 -3184428.580581777),POLYGON ((916116.6515459762 -3184428.580581777...,0.0
305,862742.094719,-3184429.0,"(862742.0947185855, -3184428.5805817773)",862742.094719,-3184429.0,POINT (862742.0947185855 -3184428.580581777),POLYGON ((864742.0947185855 -3184428.580581777...,0.0
714,909229.249016,-3184429.0,"(909229.2490159367, -3184428.5805817773)",909229.249016,-3184429.0,POINT (909229.2490159367 -3184428.580581777),POLYGON ((911229.2490159367 -3184428.580581777...,0.0
993,940940.535199,-3184429.0,"(940940.5351992156, -3184428.5805817773)",940940.535199,-3184429.0,POINT (940940.5351992156 -3184428.580581777),POLYGON ((942940.5351992156 -3184428.580581777...,0.0
984,939917.590484,-3184429.0,"(939917.590483626, -3184428.5805817773)",939917.590484,-3184429.0,POINT (939917.590483626 -3184428.580581777),"POLYGON ((941917.590483626 -3184428.580581777,...",0.0
833,922754.851367,-3184429.0,"(922754.851366511, -3184428.5805817773)",922754.851367,-3184429.0,POINT (922754.851366511 -3184428.580581777),"POLYGON ((924754.851366511 -3184428.580581777,...",0.0
227,853876.57385,-3184429.0,"(853876.573850142, -3184428.5805817773)",853876.57385,-3184429.0,POINT (853876.573850142 -3184428.580581777),"POLYGON ((855876.573850142 -3184428.580581777,...",0.0
998,941508.837819,-3184429.0,"(941508.8378189876, -3184428.5805817773)",941508.837819,-3184429.0,POINT (941508.8378189876 -3184428.580581777),POLYGON ((943508.8378189876 -3184428.580581777...,0.0
774,916048.880453,-3184429.0,"(916048.880453201, -3184428.5805817773)",916048.880453,-3184429.0,POINT (916048.880453201 -3184428.580581777),"POLYGON ((918048.880453201 -3184428.580581777,...",0.0
