<IMG SRC="https://avatars2.githubusercontent.com/u/31697400?s=400&u=a5a6fc31ec93c07853dd53835936fd90c44f7483&v=4" WIDTH=125 ALIGN="right">


# Gridding vector data 

Vector data can be points, lines or polygons often saved as shapefiles and visualised using GIS software. A common operation is to project vector data on a modelgrid. For example, to add a surface water line to a grid. In this section we present some functions in `nlmod` to project vector data on a modelgrid and to aggregate vector data to model cells.

In [None]:
import nlmod
from nlmod import resample
import numpy as np
import xarray as xr
import flopy
import warnings


from matplotlib.colors import Normalize
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.pyplot as plt

import geopandas as gpd
from shapely.geometry import LineString, Point
from shapely.geometry import Polygon as shp_polygon
from scipy.interpolate import RectBivariateSpline

from IPython.display import display

## Grid types

We create the same two grids as in the 'Resampling raster data' notebook.

In [None]:
# structured grid
ds = nlmod.get_ds([950, 1250, 20050, 20350], delr=100)
# vertex grid
dsv = nlmod.grid.refine(
    ds, refinement_features=[([Point(1200, 20200)], "point", 1)], model_ws="model7"
)

## Vector to grid

Vector data can be points, lines or polygons often saved as shapefiles and visualised using GIS software. A common operation is to project vector data on a modelgrid. For example to add a surface water line to a grid. Here are some functions in `nlmod` to project vector data on a modelgrid.

In [None]:
point_geom = [
    Point(x, y) for x, y in zip([1000, 1200, 1225, 1300], [20200, 20175, 20175, 20425])
]
point_gdf = gpd.GeoDataFrame({"values": [1, 52, 66, 24]}, geometry=point_geom)
line_geom = [
    LineString([point_geom[0], point_geom[1]]),
    LineString([point_geom[2], point_geom[3]]),
    LineString([point_geom[0], point_geom[3]]),
]
line_gdf = gpd.GeoDataFrame({"values": [1, 52, 66]}, geometry=line_geom)
pol_geom = [
    shp_polygon(
        [
            [p.x, p.y]
            for p in [point_geom[0], point_geom[1], point_geom[2], point_geom[3]]
        ]
    ),
    shp_polygon(
        [
            [p.x, p.y]
            for p in [point_geom[0], point_geom[1], point_geom[2], Point(1200, 20300)]
        ]
    ),
]
pol_gdf = gpd.GeoDataFrame({"values": [166, 5]}, geometry=pol_geom)

In [None]:
fig, ax = plt.subplots()
nlmod.plot.modelgrid_from_ds(ds).plot(ax=ax)
point_gdf.plot(ax=ax, color="green")
line_gdf.plot(ax=ax, color="purple")
pol_gdf.plot(ax=ax, alpha=0.6)

ax.set_xlim(ax.get_xlim()[0], 1400)
ax.set_ylim(ax.get_ylim()[0], 20500)

### Points

#### Aggregation methods

In [None]:
fig, axes = plt.subplots(ncols=4, figsize=(20, 5))

da1 = nlmod.grid.gdf_to_da(point_gdf, ds, "values", agg_method="min")
da2 = nlmod.grid.gdf_to_da(point_gdf, ds, "values", agg_method="max")
da3 = nlmod.grid.gdf_to_da(point_gdf, ds, "values", agg_method="mean")

vmin = min(da1.min(), da2.min(), da3.min())
vmax = max(da1.max(), da2.max(), da3.max())

da1.plot(ax=axes[0], vmin=vmin, vmax=vmax)
axes[0].set_title("aggregation min")
axes[0].axis("scaled")

da2.plot(ax=axes[1], vmin=vmin, vmax=vmax)
axes[1].set_title("aggregation max")
axes[1].axis("scaled")

da3.plot(ax=axes[2], vmin=vmin, vmax=vmax)
axes[2].set_title("aggregation mean")
axes[2].axis("scaled")

point_gdf.plot("values", ax=axes[3], vmin=vmin, vmax=vmax, legend=True)
nlmod.grid.modelgrid_from_ds(ds).plot(ax=axes[3])
axes[3].set_title("points")
axes[3].axis("scaled");

#### Interpolation methods

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(15, 5))
ds.attrs["model_ws"] = ""
sim = nlmod.sim.sim(ds)
gwf = nlmod.gwf.gwf(ds, sim)
dis = nlmod.gwf.dis(ds, gwf)
da1 = nlmod.grid.gdf_to_data_array_struc(
    point_gdf, gwf, field="values", interp_method="nearest"
)
da2 = nlmod.grid.gdf_to_data_array_struc(
    point_gdf, gwf, field="values", interp_method="linear"
)

vmin = min(da1.min(), da2.min())
vmax = max(da1.max(), da2.max())

da1.plot(ax=axes[0], vmin=vmin, vmax=vmax)
axes[0].set_title("interpolation nearest")
axes[0].axis("scaled")

da2.plot(ax=axes[1], vmin=vmin, vmax=vmax)
axes[1].set_title("interpolation linear")
axes[1].axis("scaled")


point_gdf.plot("values", ax=axes[2], vmin=vmin, vmax=vmax, legend=True)
nlmod.grid.modelgrid_from_ds(ds).plot(ax=axes[2])
axes[2].set_title("points")
axes[2].axis("scaled")

### Lines

In [None]:
fig, axes = plt.subplots(ncols=4, figsize=(20, 5))

da1 = nlmod.grid.gdf_to_da(line_gdf, ds, "values", agg_method="max_length")
da2 = nlmod.grid.gdf_to_da(line_gdf, ds, "values", agg_method="length_weighted")
da3 = nlmod.grid.gdf_to_da(line_gdf, ds, "values", agg_method="mean")

vmin = min(da1.min(), da2.min(), da3.min())
vmax = max(da1.max(), da2.max(), da3.max())

da1.plot(ax=axes[0], vmin=vmin, vmax=vmax)
axes[0].set_title("aggregation max_length")
axes[0].axis("scaled")

da2.plot(ax=axes[1], vmin=vmin, vmax=vmax)
axes[1].set_title("aggregation length_weighted")
axes[1].axis("scaled")

da3.plot(ax=axes[2], vmin=vmin, vmax=vmax)
axes[2].set_title("aggregation mean")
axes[2].axis("scaled")

line_gdf.plot("values", ax=axes[3], vmin=vmin, vmax=vmax, legend=True)
nlmod.grid.modelgrid_from_ds(ds).plot(ax=axes[3])
axes[3].set_title("lines")
axes[3].axis("scaled")

### Polygons

In [None]:
fig, axes = plt.subplots(ncols=4, figsize=(20, 5))

da1 = nlmod.grid.gdf_to_da(pol_gdf, ds, "values", agg_method="max_area")
da2 = nlmod.grid.gdf_to_da(pol_gdf, ds, "values", agg_method="area_weighted")
da3 = nlmod.grid.gdf_to_data_array_struc(pol_gdf, gwf, "values", agg_method="nearest")

vmin = min(da1.min(), da2.min(), da3.min())
vmax = max(da1.max(), da2.max(), da3.max())

da1.plot(ax=axes[0], vmin=vmin, vmax=vmax)
axes[0].set_title("aggregation max_area")
axes[0].axis("scaled")

da2.plot(ax=axes[1], vmin=vmin, vmax=vmax)
axes[1].set_title("aggregation area_weighted")
axes[1].axis("scaled")

da3.plot(ax=axes[2], vmin=vmin, vmax=vmax)
axes[2].set_title("aggregation nearest")
axes[2].axis("scaled")

pol_gdf.plot("values", ax=axes[3], vmin=vmin, vmax=vmax, legend=True)
nlmod.grid.modelgrid_from_ds(ds).plot(ax=axes[3])
axes[3].set_title("polygons")
axes[3].axis("scaled");

### Intersect vector data with grid

In [None]:
gdf_point_grid = nlmod.grid.gdf_to_grid(point_gdf, ds)
gdf_line_grid = nlmod.grid.gdf_to_grid(line_gdf, ds)
gdf_pol_grid = nlmod.grid.gdf_to_grid(pol_gdf, ds)

In [None]:
fig, ax = plt.subplots()

gdf_point_grid.plot(ax=ax, color="green")
gdf_line_grid["ind"] = range(gdf_line_grid.shape[0])
gdf_line_grid.plot("ind", ax=ax, cmap="jet")
gdf_pol_grid["ind"] = range(gdf_pol_grid.shape[0])
gdf_pol_grid.plot("ind", ax=ax, alpha=0.6)

nlmod.grid.modelgrid_from_ds(ds).plot(ax=ax)
ax.set_xlim(ax.get_xlim()[0], 1300)
ax.set_ylim(ax.get_ylim()[0], 20400);

### Aggregate parameters per model cell

Aggregatie options:
- point: max, min, mean
- line: max, min, length_weighted, max_length
- polygon: max, min, area_weighted, area_max


In [None]:
# point
display(gdf_point_grid)
nlmod.grid.aggregate_vector_per_cell(gdf_point_grid, {"values": "max"})

In [None]:
# line
display(gdf_line_grid)
nlmod.grid.aggregate_vector_per_cell(gdf_line_grid, {"values": "length_weighted"})

In [None]:
# polygon
display(gdf_pol_grid)
nlmod.grid.aggregate_vector_per_cell(gdf_pol_grid, {"values": "area_weighted"})

## Grid to reclist
For some modflow packages (drn, riv, ghb, wel) you need to specify stress_period_data to create them using flopy. This stress_period_data consists of reclists (also called lrcd for a structured grid) for every time step. 

The function `da_to_reclist` can be used to convert grid data (both structured and vertex) to a reclist. This function has many arguments:
- `mask`, boolean DataArray to determine which cells should be added to the reclist. Can be 2d or 3d.
- `layer`, if `mask` is a 2d array the value of `layer` is used in the reclist. If `mask` is 3d or `first_active_layer` is True the `layer` argument is ignored.
- `only_active_cells`, if True only add cells with an idomain of 1 to the reclist
- `first_active_layer`, if True use the first active layer, obtained from the idomain, as the layer for each cell.
- `col1`,`col2` and `col3`, The column data of the reclist.

The examples below show the result of each argument.

In [None]:
# add layer dimension
if "layer" not in ds.dims:
    ds = ds.expand_dims({"layer": range(3)})

# create some data arrays
ds["da1"] = ("layer", "y", "x"), np.random.randint(
    0, 10, (ds.dims["layer"], ds.dims["y"], ds.dims["x"])
)
ds["da2"] = ("y", "x"), np.random.randint(0, 10, (ds.dims["y"], ds.dims["x"]))
ds["da3"] = ("y", "x"), np.random.randint(0, 10, (ds.dims["y"], ds.dims["x"]))

# add a nodata value
ds.attrs["nodata"] = -999

# create an idomain of ones except for the first cell which is zero
idomain = np.ones((ds.dims["layer"], ds.dims["y"], ds.dims["x"]))
idomain[0, 0, 0] = 0
ds["idomain"] = ("layer", "y", "x"), idomain

### Mask and layer
If `mask` is a 2d array, the `layer` argument specifies the layer that is used in the reclist.

In [None]:
# structured 2d grid to reclist
mask2d = ds["da2"] == ds["da2"][0, 0]
reclist1 = nlmod.grid.da_to_reclist(
    ds, mask2d, col1=ds["da1"][0], col2="da2", layer=0, only_active_cells=False
)
reclist1

If the `mask` is three dimensional the `layer` argument is ignored.

In [None]:
# create a 3dmask
mask3d = ds["da1"] == ds["da1"].values[0, 0, 0]

# use this mask to create the reclist
reclist2 = nlmod.grid.da_to_reclist(
    ds, mask3d, col1="da1", col2=100, layer=0, only_active_cells=False
)
reclist2

### Only active cells
With `only_active_cells=True` we make sure only active cells end up in the reclist. Which cells are active is based on the `idomain` in the model dataset.

In [None]:
# Only return the cells with an active idomain
reclist3 = nlmod.grid.da_to_reclist(
    ds, mask3d, col1="da1", col2=100, only_active_cells=True
)
reclist3

In [None]:
# also possible for a 2d grid
mask2d = ds["da2"] == ds["da2"][0, 0]
reclist1 = nlmod.grid.da_to_reclist(
    ds, mask2d, col1=ds["da1"][0], col2="da2", layer=0, only_active_cells=True
)
reclist1

### First active_layer
Use `first_active_layer=True` to add the first active layer to the reclist. The first active layer is obtained from the idomain.

In [None]:
# create a reclist with col1 (str), col2 (DataArray), col3 (int)
reclist4 = nlmod.grid.da_to_reclist(
    ds, mask2d, col1="da2", col2="da3", first_active_layer=True
)
reclist4

### Reclist columns
The `col1`, `col2` and `col3` arguments specify what data should be listed in the reclist. The types can be `str`,`xarray.DataArray`,`None` or other. If the value is a `str` the corresponding DataArray from the Dataset is used to get data for the reclist. If the value is an `xarray.DataArray` the DataArray is used. If the value is `None` the column is not added to the reclist and if the value is from another type the value is used for every record in the reclist.

Be aware that if `mask` is a 3d array, the DataArrays of the column should also be 3d.

In [None]:
# create a reclist with col1 (str), col2 (DataArray), col3 (int)
reclist5 = nlmod.grid.da_to_reclist(
    ds, mask3d, col1=ds["idomain"], col2="da1", col3=9, layer=0, only_active_cells=False
)
reclist5

### Vertex model to reclist

In [None]:
# add some random DataArray to the vertex dataset
da_vert = np.random.randint(0, 10, (dsv["area"].shape))
dsv["da_vert"] = ("icell2d"), da_vert

# create rec list from a vertex dataset
mask_vert = dsv["da_vert"] == dsv["da_vert"][0]
reclist6 = nlmod.grid.da_to_reclist(
    dsv, mask_vert, col1="da_vert", col2=2330, only_active_cells=False
)
reclist6