Skip to content

Commit

Permalink
Merge pull request #8 from gidden/w_df_to_r
Browse files Browse the repository at this point in the history
hackathon improvements
  • Loading branch information
gidden committed Apr 5, 2023
2 parents 40ad146 + c2c8f8c commit 32a7a93
Show file tree
Hide file tree
Showing 10 changed files with 378 additions and 105 deletions.
15 changes: 14 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,20 @@
Changelog
=========

v0.1 (2023-03-15)
v0.3 (current)
------------------------------------------------------------

* apply panel data to multi-layered rasters
* calculate grid-cell areas by latitude


v0.2 (2023-03-18)
------------------------------------------------------------

* apply data to single-layer rasters
* retrieve data from maps (i.e., zonal statistics)

v0.1 (2023-03-16)
------------------------------------------------------------

* Initial release
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os
from importlib.metadata import version


# -- Project information --------------------------------------------------------------

source_suffix = ".rst"
Expand Down
141 changes: 117 additions & 24 deletions docs/notebooks/dataframe_to_raster.ipynb

Large diffs are not rendered by default.

76 changes: 37 additions & 39 deletions docs/notebooks/introduction.ipynb

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ dependencies = [
'rasterio',
'scipy',
'shapely',
'netCDF4'
'netCDF4',
'geopandas',
'rioxarray',
]

dynamic = ["version"]
Expand Down
10 changes: 9 additions & 1 deletion src/ptolemy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from importlib.metadata import version as _version

from . import raster
from .raster import Rasterize, df_to_raster, raster_to_df
from .raster import (
Rasterize,
cell_area,
cell_area_from_file,
df_to_raster,
df_to_weighted_raster,
raster_to_df,
)


try:
__version__ = _version("ptolemy")
Expand Down
174 changes: 151 additions & 23 deletions src/ptolemy/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,99 @@
import warnings

import fiona as fio
import geopandas as gpd
import numpy as np
import pandas as pd
import xarray as xr
from affine import Affine
from numpy.lib.stride_tricks import as_strided
from rasterio.features import rasterize as _rasterize
from shapely.geometry import shape
from shapely.geometry import Polygon, shape
from shapely.ops import unary_union


logger = logging.getLogger(__name__)


def cell_area_from_file(file, lat_name="lat", lon_name=None):
"""
Returns the grid cell area by latitude of a raster file.
Parameters
----------
file : str, pathlib.Path, xr.Dataset, or similar
a file from which to take transform and latitude objects
lat_name : str, optional
the name of the latitude dimension or coordinate
lon_name : str, optional
the name of the longitude dimension or coordinate
Returns
-------
area : a geopandas.Series with index of lats and values of area in m^2
"""
if isinstance(file, (xr.DataArray, xr.Dataset)):
ds = file
else:
ds = xr.open_dataset(file)
lats = ds[lat_name]
lons = ds[lon_name] if lon_name else None
crs = ds.rio.crs if ds.rio.crs else 4326
return cell_area(lats, lons, crs)


def cell_area(lats, lons=None, crs=4326):
"""
Computes the grid cell area given centroid latitude and longitude
coordinates.
Parameters
----------
lats : array or similar
latitude coordinates
lons : array or similar
longitude coordinates
crs : string or similar defining CRS, optional
the origin CRS
Returns
-------
area : a geopandas.Series with index of lats and values of area in m^2
"""
lat_offset = (lats[1] - lats[0]) / 2
lon_offset = (lons[1] - lons[0]) / 2 if lons is not None else lat_offset

lat_pairs = np.nditer([lats - lat_offset, lats + lat_offset])

return (
gpd.GeoDataFrame(
dict(
geometry=[
Polygon(
[
(-lon_offset, lat[0]),
(-lon_offset, lat[1]),
(lon_offset, lat[1]),
(lon_offset, lat[0]),
]
)
for lat in lat_pairs
],
lat=lats,
),
crs=crs,
)
.set_index("lat")
.to_crs("Sphere_Cylindrical_Equal_Area")
.area
)


def rescale_raster_props(affine, shape, scale):
"""Return new transform and shape for a raster for it to be scaled in each
lat/long dimension by a factor"""
"""
Return new transform and shape for a raster for it to be scaled in each
lat/long dimension by a factor.
"""
pixel_width = affine[0] / scale
pixel_height = affine[4] / scale
topleftlon = affine[2]
Expand All @@ -31,7 +109,9 @@ def rescale_raster_props(affine, shape, scale):


def block_view_2d(a, blockshape):
"""Collapse a 2d array into constituent blocks with a given shape."""
"""
Collapse a 2d array into constituent blocks with a given shape.
"""
shape = (
int(a.shape[0] / blockshape[0]),
int(a.shape[1] / blockshape[1]),
Expand All @@ -41,7 +121,8 @@ def block_view_2d(a, blockshape):


def block_apply_2d(a, blockshape, func=np.sum, weights=None):
"""Apply a function to blocks of an array.
"""
Apply a function to blocks of an array.
Returns a reduced array with shape:
a.shape[0] / blockshape[0], a.shape[1] / blockshape[1]
Expand Down Expand Up @@ -78,8 +159,9 @@ def block_apply_2d(a, blockshape, func=np.sum, weights=None):
def rasterize_majority(
geoms_idxs, atrans, shape, nodata, ignore_nodata=False, verbose=False
):
"""rasterize shapes such that the shape with the majority of area in a
cell is assigned to that cell
"""
Rasterize shapes such that the shape with the majority of area in a cell is
assigned to that cell.
Parameters
----------
Expand Down Expand Up @@ -118,7 +200,7 @@ def rasterize_majority(
if verbose:
logger.info("Beginning fast modal calculation")
ret = block_apply_2d(a, (scale, scale), func=fast_mode, weights=weights)
except:
except: # noqa: 722
warnings.warn("Could not apply fast mode function, using scipy's mode")
ret = block_apply_2d(a, (scale, scale), func=mode)

Expand Down Expand Up @@ -157,7 +239,7 @@ def transform_from_latlon(lat, lon):
return trans * scale


class Rasterize(object):
class Rasterize:
"""Example use case
```
Expand Down Expand Up @@ -209,7 +291,8 @@ def read_shpf(self, shpf, idxkey=None, flatten=None):
def rasterize(
self, strategy=None, normalize_weights=True, verbose=False, drop=True
):
"""Rasterizes the indicies of the current shapefile.
"""
Rasterizes the indicies of the current shapefile.
Parameters
----------
Expand Down Expand Up @@ -242,7 +325,7 @@ def rasterize(
dims = ["lat", "lon"]

if verbose:
logger.info("Beginning rasterization with the {} strategy".format(strategy))
logger.info(f"Beginning rasterization with the {strategy} strategy")

if strategy in ["all_touched", "centroid"]:
at = strategy == "all_touched"
Expand Down Expand Up @@ -304,8 +387,8 @@ def rasterize(
logger.info("Done with mask 3")
elif strategy == "weighted":
nodata = 0
dims += ["geometry"]
coords["geometry"] = [i for geom, i in geoms_idxs]
dims += [self.idxkey]
coords[self.idxkey] = [self.tags[i][1] for geom, i in geoms_idxs]
mask = xr.DataArray(
np.dstack(
tuple(
Expand All @@ -319,9 +402,9 @@ def rasterize(
zsum = xr.DataArray(np.sum(mask, axis=2))
mask /= zsum
else:
raise ValueError("Unknown strategy: {}".format(strategy))
raise ValueError(f"Unknown strategy: {strategy}")

name = self.idxkey or "indicies"
name = "geometry_index"
da = xr.DataArray(mask, name=name, coords=coords, dims=dims)
if drop and not "weighted":
da = da.where(da != nodata, drop=True)
Expand All @@ -342,7 +425,6 @@ def full_like(other, fill_value=np.nan, add_coords={}, replace_vars=[]):

if replace_vars:
data = data.drop(data.data_vars.keys())
dims = data.dims.keys()
shape = tuple(data.dims.values())
empty = np.zeros(shape=shape)
empty[empty == 0] = fill_value
Expand All @@ -353,7 +435,8 @@ def full_like(other, fill_value=np.nan, add_coords={}, replace_vars=[]):


def update_raster(raster, series, idxraster, idx_map):
"""Updates a raster array given a raster of indicies and values as columns.
"""
Updates a raster array given a raster of indicies and values as columns.
Parameters
----------
Expand All @@ -376,15 +459,16 @@ def update_raster(raster, series, idxraster, idx_map):
# python
replace = idxraster == mapidx
if not np.any(replace):
warnings.warn("No values found in raster for {}: {}".format(mapidx, validx))
warnings.warn(f"No values found in raster for {mapidx}: {validx}")
val = series.loc[validx]
if isinstance(val, pd.Series):
raise ValueError("Multiple entries found for {}".format(validx))
raise ValueError(f"Multiple entries found for {validx}")
raster[replace] = val


def df_to_raster(df, idxraster, idx_col, idx_map, ds=None, coords=[], cols=[]):
"""Takes data from a pd.DataFrame and deposits it on a raster
"""
Takes data from a pd.DataFrame and deposits it on a raster.
Parameters
----------
Expand Down Expand Up @@ -424,9 +508,53 @@ def df_to_raster(df, idxraster, idx_col, idx_map, ds=None, coords=[], cols=[]):
return ds


def df_to_weighted_raster(df, idxraster, col=None, extra_coords=[], sum_dim=None):
"""
Translates data to a raster with multiple weighting layers. This can be
used to apply panel data for a series of geometries (e.g., countries) onto
gridded data.
Parameters
----------
df : pd.DataFrame
a dataframe with columns or indicies aligned with coordinates in
`indexraster`
idxraster : xr.DataArray
an index raster with a layer coordinate aligned with `df`. This raster
can be made with `pt.Rasterize().rasterize()` using the
`strategy="weighted"` option.
col : str, optional
the column in `df` to cast to the map
extra_coords : list, optional
additional columns in `df` which should be translated to be coordinates. For
example, if you want to put panel data onto a raster and that data has a
"year" column, then you should call this with `coords=["year"]`
sum_dim : list, optional
string names of dimension(s) to sum along. This option can be used,
e.g., to collapse the multiple weighted layers into one 'global' result.
"""
if len(set(df.index.names) - set(idxraster.dims) - set([None])) == 0:
# no multi index set, need to align with raster
idx = list(set(df.columns) & set(idxraster.dims)) + extra_coords
df = df.set_index(idx)
if col is not None:
df = df[col].to_frame()
data = xr.Dataset.from_dataframe(df)
if len(data.data_vars) > 1:
raise ValueError(
"Currently only support rasterizing one data variable with `df_to_weighted_raster`"
)
data = data[list(data.data_vars)][0] # take only data variable
result = data * idxraster
if sum_dim is not None:
result = result.sum(dim=sum_dim)
return result


def raster_to_df(raster, idxraster, idxmap, func=None, nodata=-1):
"""Takes data from a raster and makes a pd.DataFrame. Zonal statistics can
be derived with this function.
"""
Takes data from a raster and makes a pd.DataFrame. Zonal statistics can be
derived with this function.
By default, unique values in the index raster areas are returned.
Expand Down Expand Up @@ -464,6 +592,6 @@ def default_func(ary):
ary = raster.values[idxraster.values == idx]
try:
data[idxmap[str(idx)]] = func(ary)
except:
except: # noqa: 722
continue
return pd.Series(data)
Binary file modified tests/test_data/weighted.nc
Binary file not shown.

0 comments on commit 32a7a93

Please sign in to comment.