Skip to content

Commit

Permalink
Added GPR tutorial.
Browse files Browse the repository at this point in the history
  • Loading branch information
Weizhe-Chen committed Aug 30, 2023
1 parent 2a230a8 commit 43ef102
Show file tree
Hide file tree
Showing 6 changed files with 354 additions and 48 deletions.
1 change: 0 additions & 1 deletion docs/get_started.md

This file was deleted.

24 changes: 24 additions & 0 deletions docs/tutorials/gaussian_process_regression.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Gaussian Process Regression

## Prediction

Given $N$ training inputs $\mathbf{X}\in\mathbb{R}^{N\times{D}}$ and training targets $\mathbf{y}\in\mathbb{R}^{N}$, the posterior predictive distribution has a closed-form expression:

$$
\begin{aligned}
p(f_{\star}\rvert\mathbf{y})&=\mathcal{N}(f_{\star}\rvert\mu,\nu),\\
\mu&=\mathbf{k}_{\star}^{\top}\mathbf{K}_{y}^{-1}\mathbf{y},\label{eq:pred_mu}\\
\nu&=k_{\star\star}-\mathbf{k}_{\star}^{\top}\mathbf{K}_{y}^{-1}\mathbf{k}_{\star},
\end{aligned}
$$

where $\mathbf{k}_{\star}$ is the vector of kernel values between all the training inputs $\mathbf{X}$ and the test input $\mathbf{x}^{\star}$, $\mathbf{K}_{y}$ is a shorthand of $\mathbf{K}_{\mathbf{x}}+\sigma^{2}\mathbf{I}$, $\mathbf{K}_{\mathbf{x}}$ is the covariance matrix given by the kernel function evaluated at each pair of training inputs, and $k_{\star\star}\triangleq\mathtt{k}(\mathbf{x}^{\star},\mathbf{x}^{\star})$.

## Learning

Optimizing the hyperparameters -- a process known as model selection -- is a common practice to obtain a better prediction.
Model selection is typically implemented by maximizing the model evidence (better known as log marginal likelihood)

$$
\ln{p(\mathbf{y}|\mathbf{\psi})}=\frac{1}{2}(\underbrace{-\mathbf{y}^{\top}\mathbf{K}_{y}^{-1}\mathbf{y}}_{\text{quadratic term}}-\underbrace{\ln{\mathrm{det}(\mathbf{K}_{y})}}_{\text{logdet term}}-\underbrace{N\ln(2\pi)}_{\text{constant term}}),
$$
3 changes: 2 additions & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ copyright: Copyright © 2022 - 2023 PyPolo Developers
nav:
- Introduction: index.md
- Installation: installation.md
- Get Started: get_started.md
- Tutorials:
- Gaussian Process Regression: tutorials/gaussian_process_regression.md
- Resources:
- Books: resources/books.md
- Videos: resources/videos.md
Expand Down
4 changes: 1 addition & 3 deletions pypolo/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Tuple, Union

import numpy as np
from torch.utils.tensorboard.writer import SummaryWriter

from ..utils import torch_utils

Expand All @@ -27,8 +26,7 @@ def learn(self,
x_new: np.ndarray,
y_new: np.ndarray,
num_iter: int,
verbose: bool = True,
writer: Union[SummaryWriter, None] = None) -> None:
verbose: bool = True) -> None:
r"""Optimizes the model parameters.
Args:
Expand Down
56 changes: 13 additions & 43 deletions pypolo/models/gpr_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import torch
from torch import nn
from torch.nn import Parameter
from torch.utils.tensorboard.writer import SummaryWriter
from tqdm import tqdm

from ..utils import torch_utils
Expand All @@ -14,23 +13,19 @@

class GPRModel(BaseModel, nn.Module):

def __init__(
self,
device_name,
kernel: BaseKernel,
noise: float,
lr_hyper: float = 0.01,
lr_nn: float = 0.001,
jitter: float = 1e-6,
) -> None:
def __init__(self,
device_name,
kernel: BaseKernel,
noise: float,
lr_hyper: float = 0.01,
jitter: float = 1e-6) -> None:
r"""Gaussian Process Regression.
Args:
device_name (str): The name of the device to run the model.
kernel (BaseKernel): The kernel function.
noise (float): The noise variance of the Gaussian likelihood.
lr_hyper (float, optional): Learning rate of hyper-parameters.
lr_nn (float, optional): Learning rate of network parameters.
jitter (float, optional): The jitter to add to the diagonal of the
covariance matrix. Defaults to 1e-6.
Expand All @@ -52,15 +47,14 @@ def __init__(
dtype=self.dtype,
device=self.device,
)))
self._init_optimizers(lr_hyper, lr_nn)
self._init_optimizers(lr_hyper)
self.jitter = jitter

def learn(self,
x_new: np.ndarray,
y_new: np.ndarray,
num_iter: int,
verbose: bool = True,
writer: Union[SummaryWriter, None] = None) -> None:
verbose: bool = True) -> None:
r"""Optimizes the model parameters.
Args:
Expand All @@ -78,20 +72,11 @@ def learn(self,
progress_bar = tqdm(range(num_iter), disable=not verbose)
for i in progress_bar:
self.opt_hyper.zero_grad()
if self.opt_nn is not None:
self.opt_nn.zero_grad()
loss = self._compute_loss()
loss.backward()
self.opt_hyper.step()
if self.opt_nn is not None:
self.opt_nn.step()
progress_bar.set_description(
f"Iter: {i:02d} loss: {loss.item(): .2f}")
if writer is not None:
writer.add_scalar('loss', loss.item(), i)
for name, param in self.named_parameters():
if param.grad is not None:
writer.add_histogram(name, param.grad, i)
self.eval()

def predict(self, x_test: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
Expand Down Expand Up @@ -293,31 +278,16 @@ def _compute_common(self):
iK_y = torch.cholesky_solve(self.y_train, L, upper=False)
return L, iK_y

def _init_optimizers(self, lr_hyper: float, lr_nn: float) -> None:
"""Initialize optimizers for hyper-parameters and, optinally,
neural network parameters in non-stationary kernels.
def _init_optimizers(self, lr_hyper: float) -> None:
"""Initialize optimizers for hyper-parameters.
Args:
lr_hyper (float, optional): Learning rate of hyper-parameters.
Defaults to 0.01.
lr_nn (float, optional): Learning rate of neural network parameters
in non-stationary kernels. Defaults to 0.001.
!!! note "Neural Network Parameters"
Neural network parameters are found by searching for the string
"nn" in the parameter name.
"""
self.lr_hyper, self.lr_nn = lr_hyper, lr_nn
hyper_params, nn_params = [], []
self.lr_hyper = lr_hyper
hyper_params = []
for name, param in self.named_parameters():
if "nn" in name:
nn_params.append(param)
else:
hyper_params.append(param)
hyper_params.append(param)
self.opt_hyper = torch.optim.Adam(hyper_params, lr=lr_hyper)
if nn_params:
self.opt_nn = torch.optim.Adam(nn_params, lr=lr_nn)
else:
self.opt_nn = None
Loading

0 comments on commit 43ef102

Please sign in to comment.