# Function imports

In [1]:
%load_ext autoreload
%autoreload 2

import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import warnings
warnings.filterwarnings("ignore")


import matplotlib
matplotlib.use('nbagg')
from models import GaussianProcess, LookaheadGP
from kernels import IsoSQEKernel, PeriodicKernel, QuadraticKernel

# Data provided 

1. Update Date and Time (ISO)
2. Update Duration (ms)
3. Reading Date and Time (ISO)
4. Air pressure (mb)
5. Air temperature (C) – A variable of interest.
6. Tide height (m) – Another variable of interest.
7. Wind direction (deg)
8. Wind gust speed (kn)
9. Wind speed (kn)
10. True air temperature (C) – Ground truth air temperature, against which you should compare your predictions.
11. True tide height (m) – Ground truth tide height, against which you should compare your predictions.
12. Independent tide height prediction (m) – These are some GP predictions prepared earlier for you to compare against, if you so choose.
13. Independent tide height deviation (m) – The standard deviation of the GP predictions above.
14. Dependent tide height prediction (m) – Another GP prediction built using three additional sensors not provided to you.
15. Dependent tide height deviation (m) – The standard deviation of the GP predictions above.
16. Independent air temperature prediction (C) – These are some GP predictions prepared earlier for you to compare against, if you so choose.
17. Independent air temperature deviation (C) – The standard deviation of the GP predictions above.
18. Dependent air temperature prediction (C) – Another GP prediction built using three additional sensors not provided to you.
19. Dependent air temperature deviation (C) – The standard deviation of the GP predictions above.

# Loading and pre-processing weather data

In [2]:
from data import *

# Variables for prediction

In [3]:
target_cols = [4, 5]
fig, ax = plt.subplots(len(target_cols), 1, figsize=(8,5))
column_plotter(weather_data, target_cols, ax)
ax[1].plot(all_reading_times, true_tide_heights)
plt.show()

<IPython.core.display.Javascript object>

Remarks
- Clear periodic trend in tide (~12.5hr period)
- Amplitude varies fairly smoothly over about a day
- Distinct bumps at high tide and low-mid tide

# Fitting Gaussian Process models

In [4]:
import torch
import numpy as np
# Fixing seeds for re-producibility
torch.random.manual_seed(1)
np.random.seed(1)
from evaluation import gp_inference  # Function for optimising, plotting and evaluating GP metrics

## 1. Isotropic exponentiated quadratic (IsoSQE) GP model

In [14]:
period = 60*12 + 25  # Exact tidal period about 12h and 25 mins
iso_params = torch.tensor(np.log([period/10, 2]), requires_grad=True)
iso = IsoSQEKernel(iso_params)
optimised_iso_gp = gp_inference(iso, 50, sigma_n=sigma_n, lr=1e-3, jitter=1e-2)

Prior to optimisation:



  0%|          | 0/50 [00:00<?, ?it/s]

Marginal LL : tensor([[1272.8962]], dtype=torch.float64, grad_fn=<NegBackward>)
Test data LL : tensor([[4.0014]], dtype=torch.float64)
Test data rmse : 0.1627786920680468
Old hyper-parameters: tensor([74.5000,  2.0000], dtype=torch.float64, grad_fn=<ExpBackward>)


100%|██████████| 50/50 [00:14<00:00,  3.46it/s]


New hyper-parameters: tensor([78.2831,  1.9034], dtype=torch.float64, grad_fn=<ExpBackward>)
Negative log-likelihood : tensor([[-1287.9390]], dtype=torch.float64, grad_fn=<MulBackward0>)
After optimisation:

Marginal LL : tensor([[1288.2290]], dtype=torch.float64, grad_fn=<NegBackward>)
Test data LL : tensor([[4.0037]], dtype=torch.float64)
Test data rmse : 0.15648118486805931


<IPython.core.display.Javascript object>

## 2. Periodic GP model

In [6]:
period = 60*12 + 25  # Exact tidal period about 12h and 25 mins (deliberately over-estimating)

periodic_params = torch.tensor(np.log([10, 2, period]), requires_grad=True)
periodic = PeriodicKernel(periodic_params)

gp_inference(periodic, 50, sigma_n=sigma_n, lr=1e-3, jitter=1e-2)

Prior to optimisation:



  0%|          | 0/50 [00:00<?, ?it/s]

Marginal LL : tensor([[116.3707]], dtype=torch.float64, grad_fn=<NegBackward>)
Test data LL : tensor([[-243.7621]], dtype=torch.float64)
Test data rmse : 0.25228423233699565
Old hyper-parameters: tensor([ 10.0000,   2.0000, 745.0000], dtype=torch.float64,
       grad_fn=<ExpBackward>)


100%|██████████| 50/50 [00:14<00:00,  3.40it/s]


New hyper-parameters: tensor([  9.5166,   2.1015, 742.3979], dtype=torch.float64,
       grad_fn=<ExpBackward>)
Negative log-likelihood : tensor([[-225.5840]], dtype=torch.float64, grad_fn=<MulBackward0>)
After optimisation:

Marginal LL : tensor([[226.6855]], dtype=torch.float64, grad_fn=<NegBackward>)
Test data LL : tensor([[-204.0772]], dtype=torch.float64)
Test data rmse : 0.23980227838298185


<IPython.core.display.Javascript object>

<models.GaussianProcess at 0x7feb4560d0d0>

## 3. Composite GP model (periodic + periodic * RQ covariance)

In [7]:
period = 12*60 + 25

periodic_params = torch.tensor(np.log([0.5, 0.5, period]), requires_grad=True)
periodic_kernel = PeriodicKernel(periodic_params)

quadratic_params = torch.tensor(np.log([period/2, .5, 1]), requires_grad=True)
rq_kernel = QuadraticKernel(quadratic_params, 2)

combined_kernel = rq_kernel*periodic_kernel + periodic_kernel

optimised_gp = gp_inference(combined_kernel, 50, sigma_n=sigma_n, jitter=1e-2, lr=1e-3)

Prior to optimisation:

Marginal LL : tensor([[1402.8726]], dtype=torch.float64, grad_fn=<NegBackward>)
Test data LL : tensor([[4.0214]], dtype=torch.float64)
Test data rmse : 0.04518835047879723
Old hyper-parameters: tensor([372.5000,   0.5000,   1.0000], dtype=torch.float64,
       grad_fn=<ExpBackward>)tensor([5.0000e-01, 5.0000e-01, 7.4500e+02], dtype=torch.float64,
       grad_fn=<ExpBackward>)tensor([5.0000e-01, 5.0000e-01, 7.4500e+02], dtype=torch.float64,
       grad_fn=<ExpBackward>)


100%|██████████| 50/50 [00:18<00:00,  2.73it/s]


New hyper-parameters: tensor([390.2366,   0.4771,   0.9533], dtype=torch.float64,
       grad_fn=<ExpBackward>)tensor([5.5067e-01, 4.6065e-01, 7.4187e+02], dtype=torch.float64,
       grad_fn=<ExpBackward>)tensor([5.5067e-01, 4.6065e-01, 7.4187e+02], dtype=torch.float64,
       grad_fn=<ExpBackward>)
Negative log-likelihood : tensor([[-1418.4803]], dtype=torch.float64, grad_fn=<MulBackward0>)
After optimisation:

Marginal LL : tensor([[1418.6541]], dtype=torch.float64, grad_fn=<NegBackward>)
Test data LL : tensor([[4.0401]], dtype=torch.float64)
Test data rmse : 0.04250367628054395


<IPython.core.display.Javascript object>

# Lookahead inference using GPs

## Using Iso SQE Kernel

In [8]:
optimised_iso_kernel = optimised_iso_gp.covar_kernel

iso_lookahead_gp = LookaheadGP(covar_kernel=optimised_iso_kernel, sigma_n=sigma_n, training_data=scaled_reading_times, 
                          labels=scaled_tide_heights, learn_noise=False)

lookahead = 60  # 1 hour lookahead prediction (useful as a surfer!)
t_predictions = torch.linspace(lookahead, 7500, 100)

iso_lookahead_means, iso_lookahead_vars = iso_lookahead_gp.compute_lookahead_predictive_means_vars(t_predictions, lookahead)

In [9]:
plt.figure(figsize=(10,5))
plt.scatter(scaled_reading_times, scaled_tide_heights, s=5, marker="x", label="Recorded data", color="tab:red")
#plt.plot(sample_times, sample_means, color="tab:blue", label="GP mean"
plt.title(f"{lookahead} minute lookahead point predictive distributions")
plt.plot(t_predictions, iso_lookahead_means, label="Lookahead prediction")
sigma_vector = np.diag(iso_lookahead_vars)**0.5

alphas = [0.5, 0.3, 0.1]
for i in range(3):
    j = i + 1
    plt.fill_between(t_predictions, causal_means-j*sigma_vector, causal_means+j*sigma_vector,
                    alpha=alphas[i], color="tab:blue", label=fr"GP Uncertainty - ${i+1}\sigma$")


plt.ylabel("Tide height / m")
plt.xlabel("Timestamp / days")
plt.plot(scaled_all_reading_times, scaled_true_heights, label="Ground truth", color="green")
plt.legend(ncol=2)
plt.show()

NameError: name 'causal_means' is not defined

## Using optimised Periodic*(1+RQ) Kernel

In [10]:
optimised_kernel = optimised_gp.covar_kernel

combined_lookahead_gp = LookaheadGP(covar_kernel=optimised_kernel, sigma_n=sigma_n, training_data=scaled_reading_times, 
                          labels=scaled_tide_heights, learn_noise=False)



lookahead = 60
t_predictions = torch.linspace(lookahead, 7500, 200)
lookahead_means, lookahead_vars = combined_lookahead_gp.compute_lookahead_predictive_means_vars(t_predictions, lookahead)
lookahead_means, lookahead_vars = iht(lookahead_means), (tide_std**2)*lookahead_vars

In [13]:
plt.figure(figsize=(8,5))
plt.scatter(reading_times, tide_heights, s=5, marker="x", label="Training data", color="black")
#plt.plot(sample_times, sample_means, color="tab:blue", label="GP mean"

plt.title(f"{lookahead} minute lookahead predictive distribution")
plt.plot(itt(t_predictions), lookahead_means, label="GP predictive mean")
sigma_vector = np.diag(lookahead_vars)**0.5

alphas = [0.4, 0.3, 0.1]
for i in range(3):
    j = i + 1
    plt.fill_between(itt(t_predictions), lookahead_means-j*sigma_vector, lookahead_means+j*sigma_vector,
                    alpha=alphas[i], color="tab:blue", label=fr"GP Uncertainty - ${i+1}\sigma$")

plt.xticks(rotation=45, fontsize=12)
plt.yticks(fontsize=12)
plt.ylim(plt.ylim()[0]-2)
plt.ylabel("Tide height / m", fontsize=14)
plt.xlabel("Timestamp / days", fontsize=14)
plt.plot(all_reading_times, true_tide_heights, label="Ground truth", color="green")
plt.legend(fontsize=11, ncol=2)
plt.tight_layout()
#plt.savefig("lookahead")
plt.show()

<IPython.core.display.Javascript object>