## PDE 3 - Advection-Diffusion


#### Problem Setup

$f(x,y,t) = au_x + bu_y + cu_{xx} + du_{yy} - u_t$

For the generation of our initial data samples we use:

$u: \mathbb{R}^3 \rightarrow \mathbb{R}, \; u(x,y,t) = e^{2x} + 3y - t$ <br>
$f: \mathbb{R}^3 \rightarrow \mathbb{R}, \;f(x,y,t) = 5$ <br>
$X_i := (x_i, y_i, t_i) \in [0,1] \times [0,1] \times [0, 0.135] \subseteq \mathbb{R}^3$ for $i \in \{1, \dotsc, n\}$

and our known function values will be $\{u(x_i,y_i), f(x_i,y_i)\}_{i \in \{1, \dotsc, n\}}$.

We assume that $u$ can be represented as a Gaussian process with SE kernel.

$u \sim \mathcal{GP}(0, k_{uu}(X_i, X_j; \theta))$, where $\theta = \{\sigma, l_x, l_y, l_t\}$.

Set the linear operator to:

$\mathcal{L}_X^{\phi} := a\partial_x + b\partial_y + c\partial_{x,x} + d\partial_{y,y} - \partial_t$

so that

$\mathcal{L}_X^{\phi} u = f$

Problem at hand: Estimate $\phi:=\{a, b\}$ with $\{c,d\} = \{0.5, 0.5\}$ fixed (we expect $a = -1, b = 4/3$).


#### Step 1: Simulate data

In [28]:
import time
import numpy as np
import sympy as sp
import numpy.fft as fft
from scipy.linalg import solve_triangular
import scipy.optimize as opt

In [29]:
# Global variables: x, y, t, n, y_u, y_f, s

# Number of data samples
n = 80

# Noise of our data:
s = 1e-7

In [30]:
def simulate_data():
    # np.random.seed(20)
    x = np.random.rand(n)
    y = np.random.rand(n)
    t = np.array([0.015*np.random.randint(10) for i in range(n)])
    y_u = np.exp(2*x) + 3*y - t
    y_f = 5*np.ones(n)
    return (x,y,t,y_u,y_f)
(x,y,t,y_u,y_f) = simulate_data()

#### Step 2: Evaluate kernels

$k_{uu}(X_i, X_j; \theta) = \sigma \cdot exp(-\frac{1}{2l_x}(x_i-x_j)^2 - \frac{1}{2l_y}(y_i-y_j)^2 - \frac{1}{2l_t}(t_i-t_j)^2)$

In [31]:
x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t, a,b,c,d = sp.symbols('x_i x_j y_i y_j t_i t_j \
                                                                         sigma l_x l_y l_t a b c d')
kuu_sym = sigma*sp.exp(-1/(2*l_x)*((x_i - x_j)**2) - 1/(2*l_y)*((y_i - y_j)**2) 
                       - 1/(2*l_t)*((t_i - t_j)**2))
kuu_fn = sp.lambdify((x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t), kuu_sym, "numpy")
def kuu(x, y, t, sigma, l_x, l_y, l_t):
    k = np.zeros((x.size, x.size))
    for i in range(x.size):
        for j in range(x.size):
            k[i,j] = kuu_fn(x[i], x[j], y[i], y[j], t[i], t[j], sigma, l_x, l_y, l_t)
    return k

$k_{ff}(X_i,X_j;\theta,\phi)$ <br>
$= \mathcal{L}_{X_i}^{\phi} \mathcal{L}_{X_j}^{\phi} k_{uu}(X_i, X_j; \theta)$ <br>
$= a^2\partial_{x_i, x_j}k_{uu} + ab \partial_{x_i, y_j}k_{uu} + ac \partial_{x_i, x_j, x_j}k_{uu} + ad \partial_{x_i, y_j, y_j}k_{uu} - a \partial_{x_i, t_j}k_{uu}$ <br>
$+ ba\partial_{y_i, x_j}k_{uu} + b^2\partial_{y_i, y_j}k_{uu} + bc\partial_{y_i, x_j, x_j}k_{uu} + bd\partial_{y_i, y_j, y_j}k_{uu} - b\partial_{y_i, t_j}k_{uu}$ <br>
$+ ca\partial_{x_i, x_i, x_j}k_{uu}+ cb\partial_{x_i, x_i, y_j}k_{uu}+ c^2\partial_{x_i, x_i, x_j, x_j}k_{uu}+ cd\partial_{x_i, x_i, y_j, y_j}k_{uu}- c\partial_{x_i, x_i, t_j}k_{uu}$ <br>
$+ da\partial_{y_i, y_i,x_j}k_{uu}+ db\partial_{y_i, y_i, y_j}k_{uu}+ dc\partial_{y_i, y_i, x_j, x_j}k_{uu}+ d^2\partial_{y_i, y_i, y_j, y_j}k_{uu}- d\partial_{y_i, y_i, t_j}k_{uu}$ <br>
$- a\partial_{t_i, x_j}k_{uu}- b\partial_{t_i, y_j}k_{uu}- c\partial_{t_i, x_j, x_j}k_{uu}- d\partial_{t_i, y_j, y_j}k_{uu}+ \partial_{t_i, t_j}k_{uu}$

In [32]:
kff_sym = a**2*sp.diff(kuu_sym, x_i, x_j) \
        + a*b*sp.diff(kuu_sym, x_i, y_j) \
        + a*c*sp.diff(kuu_sym, x_i, x_j, x_j) \
        + a*d*sp.diff(kuu_sym, x_i, y_j, y_j) \
        - a*sp.diff(kuu_sym, x_i, t_j) \
        + b*a*sp.diff(kuu_sym, y_i, x_j) \
        + b**2*sp.diff(kuu_sym, y_i, y_j) \
        + b*c*sp.diff(kuu_sym, y_i, x_j, x_j) \
        + b*d*sp.diff(kuu_sym, y_i, y_j, y_j) \
        - b*sp.diff(kuu_sym, y_i, t_j) \
        + c*a*sp.diff(kuu_sym, x_i, x_i, x_j) \
        + c*b*sp.diff(kuu_sym, x_i, x_i, y_j) \
        + c**2*sp.diff(kuu_sym, x_i, x_i, x_j, x_j) \
        + c*d*sp.diff(kuu_sym, x_i, x_i, y_j, y_j) \
        - c*sp.diff(kuu_sym, x_i, x_i, t_j) \
        + d*a*sp.diff(kuu_sym, y_i, y_i, x_j) \
        + d*b*sp.diff(kuu_sym, y_i, y_i, y_j) \
        + d*c*sp.diff(kuu_sym, y_i, y_i, x_j, x_j) \
        + d**2*sp.diff(kuu_sym, y_i, y_i, y_j, y_j) \
        - d*sp.diff(kuu_sym, y_i, y_i, t_j) \
        - a*sp.diff(kuu_sym, t_i, x_j) \
        - b*sp.diff(kuu_sym, t_i, y_j) \
        - c*sp.diff(kuu_sym, t_i, x_j, x_j) \
        - d*sp.diff(kuu_sym, t_i, y_j, y_j) \
        + sp.diff(kuu_sym, t_i, t_j)
kff_fn = sp.lambdify((x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t, a,b,c,d), kff_sym, "numpy")
def kff(x, y, t, sigma, l_x, l_y, l_t, a,b,c,d):
    k = np.zeros((x.size, x.size))
    for i in range(x.size):
        for j in range(x.size):
            k[i,j] = kff_fn(x[i], x[j], y[i], y[j], t[i], t[j], sigma, l_x, l_y, l_t, a,b,c,d)
    return k

$k_{fu}(X_i,X_j;\theta,\phi) \\
= \mathcal{L}_{X_i}^{\phi} k_{uu}(X_i, X_j; \theta) \\
= a\partial_{x_i}k_{uu} + b \partial_{y_i}k_{uu} + c \partial_{x_i, x_i}k_{uu} + d \partial_{y_i, y_i}k_{uu} -  \partial_{t_i}k_{uu}$

In [33]:
kfu_sym = a*sp.diff(kuu_sym, x_i) \
        + b*sp.diff(kuu_sym, y_i) \
        + c*sp.diff(kuu_sym, x_i, x_i) \
        + d*sp.diff(kuu_sym, y_i, y_i) \
        - sp.diff(kuu_sym, t_i)
kfu_fn = sp.lambdify((x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t, a,b,c,d), kfu_sym, "numpy")
def kfu(x, y, t, sigma, l_x, l_y, l_t, a,b,c,d):
    k = np.zeros((x.size, x.size))
    for i in range(x.size):
        for j in range(x.size):
            k[i,j] = kfu_fn(x[i], x[j], y[i], y[j], t[i], t[j], sigma, l_x, l_y, l_t, a,b,c,d)
    return k

In [34]:
def kuf(x, y, t, sigma, l_x, l_y, l_t, a,b,c,d):
    return kfu(x, y, t, sigma, l_x, l_y, l_t, a,b,c,d).T

#### Step 3: Compute NLML (with Cholesky decomposition)

Implementing the covariance matrix K and its inverse

In [35]:
def K(sigma, l_x, l_y, l_t, a,b,c,d, s):
    K_mat = np.block([
        [kuu(x, y, t, sigma, l_x, l_y, l_t)+s*np.eye(n),kuf(x, y, t, sigma, l_x, l_y, l_t, a,b,c,d)],
        [kfu(x, y, t, sigma, l_x, l_y, l_t,a,b,c,d),kff(x,y,t,sigma,l_x,l_y,l_t,a,b,c,d)+s*np.eye(n)]
    ])
    return K_mat

In [36]:
def K_inv_and_det(sigma, l_x, l_y, l_t, a,b,c,d, s):
    
    K_inv = np.zeros((2*n, 2*n))
    log_sum = 0
    
    # Use Cholesky, if possible. Otherwise use SVD.
        
    try:
        L = np.linalg.cholesky(K(sigma, l_x, l_y, l_t, a,b,c,d, s))
        L_inv = solve_triangular(L, np.identity(2*n), lower=True) # Slight performance boost 
                                                                  # over np.linalg.inv
        K_inv = (L_inv.T).dot(L_inv)

        for i in range(2*n):
            log_sum = log_sum + np.log(np.abs(L[i,i]))
    except np.linalg.LinAlgError:
        # Inverse of K via SVD
        u, s_mat, vt = np.linalg.svd(K(sigma, l_x, l_y, l_t, a,b,c,d, s))
        K_inv = (vt.T).dot(np.linalg.inv(np.diag(s_mat))).dot(u.T)  

        # Calculating the log of the determinant of K
        # Singular values are always positive.
        for i in range(s_mat.size):
            log_sum = log_sum + np.log(s_mat[i])
        
    return K_inv, log_sum

Implementing normalized negative log-likelihood function

In [37]:
def nlml(params):
    
    c_par = 0.5
    d_par = 0.5
    
    # Exponentiation to enable unconstrained optimization
    sigma_exp = np.exp(params[0]) 
    l_x_exp = np.exp(params[1])
    l_y_exp = np.exp(params[2]) 
    l_t_exp = np.exp(params[3])
    # a = params[4]
    y_con = np.concatenate((y_u, y_f))
    
    A,b = K_inv_and_det(sigma_exp, l_x_exp, l_y_exp, l_t_exp, params[4],params[5],c_par,d_par, s)
        
    val = b + y_con @ A @ y_con
    return val

#### Step 4: Optimize hyperparameters

**1. Nelder-Mead**

In [38]:
def callbackf(params):
    print(params)

In [39]:
def minimize_restarts(x,y,y_u,y_f,n=5): 
    all_results = []
    for it in range(0,n):
        all_results.append(opt.minimize(nlml, np.random.rand(6), callback = callbackf, 
                                        method="Nelder-Mead", 
                                        options={'maxfev':10000, 'fatol': 0.001}))
    return min(all_results, key = lambda x: x.fun)

In [None]:
t0 = time.time()
m = minimize_restarts(x, y, y_u, y_f, 3)
t_Nelder = time.time() - t0
print(m)

In [44]:
t_Nelder

10960.663992643356

In [45]:
print('The inferred parameters are:')
print('a = ', m.x[4])
print('b = ', m.x[5])

The inferred parameters are:
a =  -0.9999297756402357
b =  1.333116719690449
