In [None]:
from geostat import GP, Mesh, NormalizingFeaturizer
import matplotlib.pyplot as pp
import numpy as np
from shapely.geometry import Point, Polygon
import shapely.vectorized as shv
import geopandas as gpd
import contextily as ctx

# Overview

In this notebook we will show how `Mesh` is used to make prediction locations. We'll generate synthetic data on a map of watersheds in Berkeley, California; fit a `GP` to the data; and make predictions using `Mesh`.

# Synthesizing data

We will synthesize data at 200 random locations drawn from inside a polygon for Berkeley watersheds.

In [None]:
berkeleydf = gpd.read_file("./berkeley-watershed.zip")
berkeley = berkeleydf['geometry'].iloc[0]

x0, y0, x1, y1 = berkeley.bounds
locs = np.random.uniform(size = [2000, 2]) * [x1-x0, y1-y0] + [x0, y0] # Generate 2000 points.
mask = [berkeley.contains(Point(p)) for p in locs]
locs = locs[mask, :][:200, :] # Filter away points outside of shape and keep just 200.

Declare the terms of the spatial trend:

In [None]:
def trend_terms(x, y): return x

Create a featurizer that the Gaussian process class `GP` will use to convert locations into trend features:

In [None]:
featurizer = NormalizingFeaturizer(trend_terms, locs)

Instantiate a `GP` and immediately call `generate` to generate synthetic observations.
  * `parameter0` holds the geostatistical parameters for the `GP`.
  * `alpha` parameterizes the normal distribution prior for trend coefficients. 

In [None]:
obs = GP(featurizer = featurizer,
         covariance_func = 'squared-exp',
         parameters = dict(range=2000, sill=1., nugget=0.25),
         hyperparameters = dict(alpha=0.2),
         verbose=True).generate(locs)

vmin, vmax = obs.min(), obs.max()

When the data is plotted, you can see an overall trend with some localized variations.

In [None]:
fig, ax = pp.subplots(figsize=(7, 7), dpi=120)

cax = pp.scatter(locs[:, 0], locs[:, 1], c=obs, vmin=vmin, vmax=vmax)

berkeleydf.plot(ax=ax, fc='none', ec='black', lw=1)

pp.colorbar(cax, shrink=0.7)

ctx.add_basemap(ax, attribution=False, crs=berkeleydf.crs.to_string(), 
                source='https://basemap.nationalmap.gov/arcgis/rest/services/'
                       'USGSImageryOnly/MapServer/tile/{z}/{y}/{x}')

pp.title('Synthetic data')

pp.tight_layout()
pp.show()

# Inferring parameters

Now we create a second `GP`. This time we pass in the data (`locs` and `obs`) and it will fit the geospatial parameters to the data. Here `parameters` holds initial geospatial parameters, which are different that those in the first `GP`, but after fitting they converge to something close.

In [None]:
gp = GP(featurizer = featurizer,
        covariance_func = 'squared-exp',
        parameters = dict(range=1000.0, sill=0.5, nugget=0.5),
        hyperparameters = dict(alpha=obs.ptp()**2, reg=0, train_iters=300),
        verbose=True).fit(locs, obs)

# Generating predictions in convex hull

Create a mesh using a convex hull for making predictions.

In [None]:
mesh = Mesh.from_convex_hull(locs, nx=200)

Call `GP` to get predictions at mesh locations:

In [None]:
mean, var = gp.predict(locs, obs, mesh.locations())

Create a slice for prediction mean and plot:

In [None]:
meshx, meshy, value = mesh.slice(mean)

fig, ax = pp.subplots(figsize=(7, 7), dpi=120)

cax = pp.pcolormesh(meshx, meshy, value, vmin=vmin, vmax=vmax)

berkeleydf.plot(ax=ax, fc='none', ec='black', lw=1)

pp.colorbar(cax, shrink=0.7)

ctx.add_basemap(ax, attribution=False, crs=berkeleydf.crs.to_string(), 
                source='https://basemap.nationalmap.gov/arcgis/rest/services/'
                       'USGSImageryOnly/MapServer/tile/{z}/{y}/{x}')

pp.title('Prediction mean')

pp.tight_layout()
pp.show()

Do the same for prediction variance:

In [None]:
meshx, meshy, value = mesh.slice(var)

fig, ax = pp.subplots(figsize=(7, 7), dpi=120)

cax = pp.pcolormesh(meshx, meshy, value, cmap='gist_heat_r')

berkeleydf.plot(ax=ax, fc='none', ec='black', lw=1)

pp.colorbar(cax, shrink=0.7)

ctx.add_basemap(ax, attribution=False, crs=berkeleydf.crs.to_string(), 
                source='https://basemap.nationalmap.gov/arcgis/rest/services/'
                       'USGSImageryOnly/MapServer/tile/{z}/{y}/{x}')

pp.title('Prediction variance')

pp.tight_layout()
pp.show()

# Generating predictions in arbitrary shape

Now instead, create a mesh using the shape for Berkeley watersheds to make predictions.

In [None]:
mesh = Mesh.from_polygon(berkeley, nx=200)

Make predictions:

In [None]:
mean, var = gp.predict(locs, obs, mesh.locations())

Create a slice for prediction mean and plot:

In [None]:
meshx, meshy, value = mesh.slice(mean)

fig, ax = pp.subplots(figsize=(7, 7), dpi=120)

cax = pp.pcolormesh(meshx, meshy, value, vmin=vmin, vmax=vmax)

# Add contour
value_contains = shv.contains(berkeleydf.geometry.item(), meshx, meshy)
value_mask = np.where(value_contains, value, np.nan)
pp.contour(meshx, meshy, value_mask, colors='k', linewidths=0.5, alpha=0.8)


berkeleydf.plot(ax=ax, fc='none', ec='black', lw=1)

pp.colorbar(cax, shrink=0.7)

ctx.add_basemap(ax, attribution=False, crs=berkeleydf.crs.to_string(), 
                source='https://basemap.nationalmap.gov/arcgis/rest/services/'
                       'USGSImageryOnly/MapServer/tile/{z}/{y}/{x}')

pp.title('Prediction mean')

pp.tight_layout()
pp.show()

Do the same for prediction variance:

In [None]:
meshx, meshy, value = mesh.slice(var)

fig, ax = pp.subplots(figsize=(7, 7), dpi=120)

cax = pp.pcolormesh(meshx, meshy, value, cmap='gist_heat_r')

berkeleydf.plot(ax=ax, fc='none', ec='black', lw=1)

pp.colorbar(cax, shrink=0.7)

ctx.add_basemap(ax, attribution=False, crs=berkeleydf.crs.to_string(), 
                source='https://basemap.nationalmap.gov/arcgis/rest/services/'
                       'USGSImageryOnly/MapServer/tile/{z}/{y}/{x}')

pp.title('Prediction variance')

pp.tight_layout()
pp.show()