# Interpolation

* linear, splines
* LOESS (from before)
* griddata
* warning
* fit to model

* https://docs.scipy.org/doc/scipy/reference/interpolate.html
* https://docs.scipy.org/doc/scipy/tutorial/interpolate.html

## 1D interpolation

### Simplest: linear

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
def f1(x):
    return np.sin(x + np.pi / 5)


# Generate spare data (very simply example)
datax = np.linspace(0, 2 * np.pi, 10)
datay = f1(datax)

plt.plot(datax, datay, "bo")
plt.show()

In [None]:
x = np.linspace(0, 2 * np.pi, 100)
y = np.interp(x, datax, datay)

plt.plot(datax, datay, "bo", label="data")
plt.plot(x, y, c="orange", label="interpolation (linear)")
plt.plot(x, f1(x), "k--", label="'true' function")
plt.legend()
plt.show()

### Cubic splines - very common methos that works well
* See documentation for more details, options, methods etc.
* https://docs.scipy.org/doc/scipy/reference/interpolate.html

In [None]:
from scipy.interpolate import CubicSpline

spline_fit = CubicSpline(datax, datay)

plt.plot(datax, datay, "bo", label="data")
plt.plot(x, y, c="orange", label="interpolation (linear)")
plt.plot(x, spline_fit(x), c="red", label="interpolation (cubic splines)")
plt.plot(x, f1(x), "k--", label="'true' function")
plt.legend()
plt.show()

In [None]:
from scipy.interpolate import CubicSpline

x2 = np.linspace(-np.pi, 3 * np.pi, 100)
y2 = np.interp(x2, datax, datay)

spline_fit = CubicSpline(datax, datay)

plt.plot(datax, datay, "bo", label="data")
plt.plot(x2, y2, c="orange", label="interpolation (linear)")
plt.plot(x2, spline_fit(x2), c="red", label="interpolation (cubic splines)")
plt.plot(x2, f1(x2), "k--", label="'true' function")
plt.title("Be careful extrapolating")
plt.legend()
plt.show()

In [None]:
# A more complicated function
def f2(x):
    return np.sin(x + np.pi / 4) * np.exp(-((x - 3) ** 2) / 5) * np.cos(4 * x)


data2 = f2(datax)

x3 = np.linspace(-0.5, 2 * np.pi + 0.5, 100)
linear_y = np.interp(x3, datax, data2)
spline_fit = CubicSpline(datax, data2)

plt.plot(datax, data2, "bo", label="data")
plt.plot(x3, linear_y, c="orange", label="interpolation (linear)")
plt.plot(x3, spline_fit(x3), c="red", label="interpolation (cubic splines)")
plt.plot(x3, f2(x3), "k--", label="'true' function")
plt.title("Not magic: be generally careful!")
plt.legend()
plt.show()

* If there's a good theoretitical model to use, then often better to fit the data to the model (see examples from few weeks back about model fitting)

-----
## Interpolation as smoothing

* Normally, interpolation passes through all the data point
* If data is noisy, that's probably not what you want!
* When we have lots of data, the savgol-type (~LOESS) smoothing methods work wekk (see week 4). But if there's not too much data, spline smoothing works well

In [None]:
datax2 = np.linspace(0.0, 2 * np.pi, 50)
noise = np.random.normal(0.0, 0.07, len(datax2))
datay2 = f2(datax2) + noise

plt.plot(datax2, datay2, "bo", label="data")
plt.plot(x, f2(x), "k--", label="'true' function")
plt.legend()
plt.show()

In [None]:
from scipy.interpolate import splrep, BSpline

tck = splrep(datax2, datay2, s=0)
tck_1 = splrep(datax2, datay2, s=1)


plt.plot(datax2, datay2, "bo", label="data")
plt.plot(x, BSpline(*tck)(x), c="orange", label="No smoothing")
plt.plot(x, BSpline(*tck_1)(x), c="red", label="With s=1 smoothing")
plt.plot(x, f2(x), "k--", label="'true' function")
plt.legend()
plt.show()

This can be better than the savgol (~LOESS) smoothing we used before in the case where there isn't too much data.
The savgol filter still werks better when there's lots of data (see week 4)

In [None]:
from scipy import signal

y_smooth = signal.savgol_filter(datay2, window_length=4, polyorder=2, mode="nearest")

plt.plot(datax2, datay2, "bo", label="data")
plt.plot(datax2, y_smooth, c="orange", label="savgol_filter")
plt.plot(x, BSpline(*tck)(x), c="red", label="spline smoothing")
plt.plot(x, f2(x), "k--", label="'true' function")
plt.legend()
plt.show()

-------------
## Interpolating mutli-dimensional data

Two cases: 
* regular data (uniformly-spaced grid)
    * https://docs.scipy.org/doc/scipy/tutorial/interpolate/ND_regular_grid.html
    * https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.RegularGridInterpolator.html
* irregular data (non-uniformly-spaced grid)
    * https://docs.scipy.org/doc/scipy/tutorial/interpolate/ND_unstructured.html
    * https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.griddata.html


### Regular data (known on a uniformly-spaced grid)

In [None]:
from scipy.interpolate import RegularGridInterpolator


# Example function
def F(u, v):
    return u * np.cos(u * v) + v * np.sin(u * v)


xgrid = np.linspace(0, 3.0, 8)
ygrid = np.linspace(0, 3.0, 11)

# "Sampled" data
Xs, Ys = np.meshgrid(xgrid, ygrid)
Zs = F(Xs, Ys)

In [None]:
fig, [ax1, ax2] = plt.subplots(2)

x = np.linspace(0, 3.0, 100)
y = np.linspace(0, 3.0, 100)
X, Y = np.meshgrid(x, y)
Z = F(X, Y)

ax1.pcolormesh(X, Y, Z)
ax1.set_title("True function")
ax2.pcolormesh(Xs, Ys, Zs)
ax2.set_title("Finite-sampled")
fig.tight_layout()
plt.show()

In [None]:
# Form the interpolation: note the transpose
interp = RegularGridInterpolator([xgrid, ygrid], Zs.T)

# create array of 2D (x,y) points to interpolate onto
test_points = np.array([X.flatten(), Y.flatten()]).T

fig, axes = plt.subplots(2, 3, figsize=(10, 6))
axes = axes.flatten()
for i, method in enumerate(["nearest", "linear", "slinear", "cubic", "quintic"]):
    im = interp(test_points, method=method).reshape(len(x), len(y))
    axes[i].pcolormesh(X, Y, im)
    axes[i].set_title(method)
axes[-1].pcolormesh(X, Y, Z)
axes[-1].set_title("True function")
fig.tight_layout()
plt.show()

-----
## non-uniform grid data

* Very often, we have data in the form:
```
x1 y1 z1
x2 y2 z2
x3 y3 z3
...
xN yN zN
```

Where the spacing between each of the x's (and y's etc.) is not always the same, and not even necisarily in order!

In [None]:
# Define some 2D function as an example:
def f3(x, y):
    return 2 - 2 * np.cos(x) * np.cos(y) - np.cos(x - 2 * y)

In [None]:
# Generate data set by randomly sampling the function 100 times
# Simulates real, non-uniform, non-ordered data

# To make randomness reproducable
np.random.seed(19680801)

xmin, xmax = -np.pi, np.pi
ymin, ymax = 0, 2 * np.pi
num_samples = 250
data = []
for i in range(num_samples):
    x = np.random.uniform(xmin, xmax)
    y = np.random.uniform(ymin, ymax)
    z = f3(x, y)
    data.append([x, y, z])
data = np.array(data)

print(data)

In [None]:
# Just for convenience:
xdata = data[:, 0]
ydata = data[:, 1]
zdata = data[:, 2]

plt.scatter(xdata, ydata, marker="x", c=zdata, cmap="coolwarm")
plt.colorbar()
plt.show()

In [None]:
from scipy.interpolate import griddata

# Get xy data as array of 2D points
xy_data = data[0:, :2]
# Equivilant to:
# xy_data = np.column_stack((x, y))


# Find min/max sample points:
xi, xf = np.min(xdata), np.max(xdata)
yi, yf = np.min(ydata), np.max(ydata)

# Create fine grid of points
xs = np.linspace(xi, xf, 100)
ys = np.linspace(yi, yf, 100)
X, Y = np.meshgrid(xs, ys)

# interpolate data onto our fine grid
Z_near = griddata(xy_data, zdata, (X, Y), method="nearest")
Z_lin = griddata(xy_data, zdata, (X, Y), method="linear")
Z_cub = griddata(xy_data, zdata, (X, Y), method="cubic")

# "True" function, just for comparison
Z_true = f3(X, Y)

In [None]:
import matplotlib.cm as cm
from matplotlib.colors import Normalize

fig, axs = plt.subplots(2, 2, figsize=(10, 6), layout="constrained")
fig.tight_layout(pad=1.5)

axs = axs.flatten()

# Use the same colormap for all sub-plots:
# (This time, it would be close enough without bothering, but not always the case!)
normalizer = Normalize(zdata.min(), zdata.max())
im = cm.ScalarMappable(norm=normalizer, cmap="cividis")
fig.colorbar(im, ax=axs)

axs[0].scatter(xdata, ydata, marker="x", c=zdata, cmap="cividis", norm=normalizer)
axs[0].set_title("Sampled data")

axs[1].pcolormesh(X, Y, Z_true, cmap="cividis", norm=normalizer)
axs[1].set_title("True function")

axs[2].pcolormesh(X, Y, Z_near, cmap="cividis", norm=normalizer)
axs[2].set_title("Interpolation (nearest)")

axs[3].pcolormesh(X, Y, Z_lin, cmap="cividis", norm=normalizer)
axs[3].set_title("Interpolation (linear)")


plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=2)


ax1.set_title("Contours, using interp'd data")
ax1.contour(X, Y, Z_lin, colors="k")
cntr1 = ax1.contourf(X, Y, Z_lin, cmap="cividis")

ax2.set_title("Contours, using tricontour")
ax2.tricontour(xdata, ydata, zdata, colors="k")
cntr2 = ax2.tricontourf(xdata, ydata, zdata, cmap="cividis")
# ax2.scatter(xdata, ydata, marker="x", color="k", alpha=0.2)

fig.colorbar(cntr1, ax=ax1)
fig.colorbar(cntr2, ax=ax2)

fig.tight_layout()
plt.show()