## **Load in data**

In [20]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import pylab
from sklearn import preprocessing
import dateutil.parser as dp
from GP import * 



data_url = 'http://www.robots.ox.ac.uk/~mosb/teaching/AIMS_CDT/sotonmet.txt'
data = pd.read_csv(data_url)

print(data.columns)

# Time in days since first reading
t = data['Reading Date and Time (ISO)']
t = [int(dp.parse(x).strftime('%s')) for x in t]
t = [float(x - t[0])/86400 for x in t]

# Training and test data
y = data['Tide height (m)']
ytrue = data['True tide height (m)']

# Remove missing data points
data_train = [(t[i], y[i]) for i in range(len(t)) if not np.isnan(y[i])]
data_test = [(t[i], ytrue[i]) for i in range(len(t)) if not np.isnan(ytrue[i])]
t, y = [x[0] for x in data_train], [x[1] for x in data_train]
ttrue, ytrue = [x[0] for x in data_test], [x[1] for x in data_test]

# Normalise both datasetts
mean, stdev = np.mean(y), np.std(y)
y = [(yi - mean) / stdev for yi in y]
ytrue = [(yi - mean) / stdev for yi in ytrue]



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Index(['Update Date and Time (ISO)', 'Update Duration (ms)',
       'Reading Date and Time (ISO)', 'Air pressure (mb)',
       'Air temperature (C)', 'Tide height (m)', 'Wind direction (deg)',
       'Wind gust speed (kn)', 'Wind speed (kn)', 'True air temperature (C)',
       'True tide height (m)', 'Independent tide height prediction (m)',
       'Independent tide height deviation (m)',
       'Dependent tide height prediction (m)',
       'Dependent tide height deviation (m)',
       'Independent air temperature prediction (C)',
       'Independent air temperature deviation (C)',
       'Dependent air temperature prediction (C)',
       'Dependent air temperature deviation (C)'],
      dtype='object')


## **Initial Test**

Hyperparameters chosen manually to provide a reasonable fit to the data, using a summed RBF kernel and periodic kernel.

In [21]:
# Plot width and height in inches
width, height = 16, 8
pylab.rcParams['figure.figsize'] = width, height

# RBF kernel - sigma_f, L
kernel = Kernel()
kernel.addKernel("RBF", 1.0, 0.02)
# kernel.addKernel("RBF", 1, 0.05) # good

# Periodic kernel - sigma_f, L, p
kernel.addKernel("periodic", 1, 0.4, 0.5175)

# input data
# points = 5
X = t
Y = y
Xs = np.linspace(-1, 6, 500)
jitter = 0.1

# mean and covariance functions
mu = np.zeros(len(X))
K = kernel(X, X)

# Points of interest
mu, sigma = GetPosteriorPredictive(X, Y, Xs, kernel, jitter)

# Uncertainties (sigma and 2sigma)
y1 = np.array([mu[i] + 2 * sigma[i][i] for i in range(len(Xs))])
y2 = np.array([mu[i] - 2 * sigma[i][i] for i in range(len(Xs))])
pylab.fill_between(Xs, y1, y2, linewidth=0, color='blue', alpha=0.1, label='2 SD')
y1 = np.array([mu[i] + 1 * sigma[i][i] for i in range(len(Xs))])
y2 = np.array([mu[i] - 1 * sigma[i][i] for i in range(len(Xs))])
pylab.fill_between(Xs, y1, y2, linewidth=0, color='blue', alpha=0.2, label='1 SD')

# measurements and truth data
pylab.plot(t, y, '.r', label='training data')
pylab.plot(ttrue, ytrue, '.b', label='test data')

# mean function
pylab.plot(Xs, mu, '-r', alpha=0.5, label='mean function')

# function draws
draws = 0
for i in range(draws):
    fs = GetFunctionSample(mu, sigma)
    pylab.plot(Xs, fs, color='green', alpha=0.3, label='function draws')

pylab.legend(loc='upper right')
pylab.xlabel('time (days)')
pylab.ylabel('tide height (normalised)')
pylab.ylim((-5, 5))
# pylab.xlim((3,4))

pylab.show()

AttributeError: 'list' object has no attribute 'T'