In [1]:
%matplotlib inline

import sys
import numpy as np
import pandas as pd
import scipy
import sklearn
import sklearn.datasets
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from functools import partial

from bokeh.plotting import figure, show
from bokeh.models import BoxAnnotation, Span, Label, Legend, Title
from bokeh.io import output_notebook
from bokeh.palettes import brewer
output_notebook()

tfd = tfp.distributions
psd_kernels = tfp.positive_semidefinite_kernels

sns.set_style('darkgrid')
np.random.seed(42)
# Print versions used
print('Python: {}.{}.{}'.format(*sys.version_info[:3]))
print('Numpy: {}'.format(np.__version__))
print('Pandas: {}'.format(pd.__version__))
print('Tensorflow: {}'.format(tf.__version__))
print('Tensorflow Probability: {}'.format(tfp.__version__))
print('sklearn: {}'.format(sklearn.__version__))
print('matplotlib: {}'.format(matplotlib.__version__))
print('seaborn: {}'.format(sns.__version__))
#

  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)


Python: 3.6.6
Numpy: 1.15.1
Pandas: 0.23.4
Tensorflow: 1.11.0
Tensorflow Probability: 0.4.0
sklearn: 0.19.2
matplotlib: 2.2.3
seaborn: 0.9.0


- https://www.tensorflow.org/probability/api_docs/python/tfp/distributions/GaussianProcessRegressionModel
- https://www.tensorflow.org/probability/api_docs/python/tfp/positive_semidefinite_kernels
- https://github.com/tensorflow/probability/tree/master/tensorflow_probability/python/positive_semidefinite_kernels

In [2]:
co2_df = pd.read_csv(
    'monthly_in_situ_co2_mlo.csv', 
    header=54, skiprows=[55, 56], usecols=[3, 4],
    na_values='-99.99')

# Drop missing values
co2_df.dropna(inplace=True)
# Remove whitespace from column names
co2_df.rename(columns=lambda x: x.strip(), inplace=True)

In [3]:
# co2_df.columns

In [4]:
# co2_df.head()

In [5]:
# co2_df.tail()

In [6]:
# co2_df.describe()

In [7]:
date_split = 2004
df_train = co2_df[co2_df.Date < date_split]
X_train, y_train = df_train.Date, df_train.CO2
print('{} measurments in the training set'.format(len(df_train)))
df_test = co2_df[co2_df.Date >= date_split]
X_test, y_test = df_test.Date, df_test.CO2
print('{} measurments in the test set'.format(len(df_test)))

545 measurments in the training set
176 measurments in the test set


In [8]:
print('X_train: ', X_train.shape)
print('y_train: ', y_train.shape)

X_train:  (545,)
y_train:  (545,)


In [9]:
# make plot
p = figure(width=600, height=400)
p.xaxis.axis_label = 'Date'
p.yaxis.axis_label = 'CO2 [ppm]'
p.add_layout(Title(text=(
    'In situ air measurements at Mauna Loa, Observatory, Hawaii: '
    'Latitude 19.5°N Longitude 155.6°W Elevation 3397m'), 
                   text_font_style="italic"), 'above')
p.add_layout(Title(
    text='Atmospheric CO2 concentrations (ppm)', text_font_size="16pt"), 
             'above')
# p.line(co2_df.Date, co2_df.CO2, line_width=2)
p.line(
    X_train, y_train, legend='train',
    line_width=2, line_color='darkblue')
p.line(
    X_test, y_test, legend='test',
    line_width=2, line_color='firebrick')
p.legend.location = 'top_left'
show(p)

In [10]:
# from linear import Linear
from dot_product import DotProd
from rational_quadratic import RationalQuadratic

# # Linear kernel
# amplitude_linear = tf.math.softplus(tf.Variable(np.float64(0)), name='amplitude_linear')
# bias_linear = tf.math.softplus(tf.Variable(np.float64(0)), name='bias_linear')
# origin_linear = tf.Variable(np.float64(0), name='origin_linear')
# kernel_linear = Linear(
#     amplitude=amplitude_linear,
#     bias=bias_linear,
#     origin=origin_linear)
# kernel_linear_batch_shape = kernel_linear.batch_shape_tensor()
# x = np.ones([5, 3], np.float32)
# y = np.ones([4, 3], np.float32)
# print('kernel_linear_batch_shape: ', kernel_linear_batch_shape)

In [11]:
# Optimize model parameters via maximum marginal likelihood
date_obeserved = np.expand_dims(df_train.Date.values, 1)
print('date_obeserved: ', date_obeserved.shape)
co2_observed = df_train.CO2.values
print('co2_observed: ', co2_observed.shape)
date_predict = np.expand_dims(df_test.Date.values, 1)
print('date_predict: ', date_predict.shape)

# Define a kernel with trainable parameters. Note we transform the trainable
# variables to apply a positivity constraint.
# Smooth kernel
amplitude_smooth = tf.exp(tf.Variable(np.float64(0)), name='amplitude_smooth')
length_scale_smooth = tf.exp(tf.Variable(np.float64(0)), name='length_scale_smooth')
kernel_smooth = psd_kernels.ExponentiatedQuadratic(
    amplitude=amplitude_smooth, 
    length_scale=length_scale_smooth)
# Periodic kernel smoothed
amplitude_periodic = tf.exp(tf.Variable(np.float64(0)), name='amplitude_periodic')
length_scale_periodic = tf.exp(tf.Variable(np.float64(0)), name='length_scale_periodic')
period_periodic = tf.exp(tf.Variable(np.float64(0)), name='period_periodic')
amplitude_periodic_smooth = tf.exp(tf.Variable(np.float64(0)), name='amplitude_periodic_smooth')
length_scale_periodic_smooth = tf.exp(tf.Variable(np.float64(0)), name='length_scale_periodic_smooth')
# kernel_periodic = psd_kernels.ExpSinSquared(
#     amplitude=amplitude_periodic, 
#     length_scale=length_scale_periodic,
#     period=period_periodic)
kernel_periodic = (
    psd_kernels.ExpSinSquared(
        amplitude=amplitude_periodic, 
        length_scale=length_scale_periodic,
        period=period_periodic)) # * 
#     psd_kernels.ExponentiatedQuadratic(
#         amplitude=amplitude_periodic_smooth, 
#         length_scale=length_scale_periodic_smooth))
# Linear kernel
amplitude_linear = tf.exp(tf.Variable(np.float64(0)), name='amplitude_linear')
bias_linear = tf.exp(tf.Variable(np.float64(0)), name='bias_linear')
power_linear = tf.exp(tf.Variable(np.float64(0)), name='power_linear')
amplitude_linear_smooth = tf.exp(tf.Variable(np.float64(0)), name='amplitude_linear_smooth')
length_scale_linear_smooth = tf.exp(tf.Variable(np.float64(5)), name='length_scale_linear_smooth')
kernel_dotprod = (
    DotProd(
        amplitude=amplitude_linear,
        bias=bias_linear) *
    psd_kernels.ExponentiatedQuadratic(
        amplitude=amplitude_linear_smooth, 
        length_scale=length_scale_linear_smooth))

# Rational quadratic
amplitude_irregular = tf.exp(tf.Variable(np.float64(0)), name='amplitude_irregular')
length_scale_irregular = tf.exp(tf.Variable(np.float64(0)), name='length_scale_irregular')
scale_mixture_irregular = tf.exp(tf.Variable(np.float64(0)), name='scale_mixture_irregular')
kernel_irregular = RationalQuadratic(
    amplitude=amplitude_irregular,
    length_scale=length_scale_irregular,
    scale_mixture=scale_mixture_irregular
)

kernel = kernel_smooth + kernel_periodic + kernel_dotprod + kernel_irregular

observation_noise_variance = tf.exp(
    tf.Variable(np.float64(0)), name='observation_noise_variance')

# We'll use an unconditioned GP to train the kernel parameters.
gp = tfd.GaussianProcess(
    kernel=kernel,
    index_points=date_obeserved,
    observation_noise_variance=observation_noise_variance)
neg_log_likelihood = -gp.log_prob(co2_observed)

optimizer = tf.train.AdamOptimizer(learning_rate=0.002)
optimize = optimizer.minimize(neg_log_likelihood)


session = tf.InteractiveSession()
session.run(tf.global_variables_initializer())

for i in range(5000):
    _, neg_log_likelihood_ = session.run([optimize, neg_log_likelihood])
    if i % 100 == 0:
        print("Step {}: NLL = {}".format(i, neg_log_likelihood_))

print("Final NLL = {}".format(neg_log_likelihood_))

date_obeserved:  (545, 1)
co2_observed:  (545,)
date_predict:  (176, 1)
dtype:  <class 'numpy.float64'>
self.bias:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> ()
self.amplitude:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> ()

DotProd._apply
x1:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (545, 1, 1)
x2:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (1, 545, 1)
x2.T:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (1, 545, 1)
dot_prod:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (545, 545)
dot_prod:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (545, 545)
dot_prod:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (545, 545)
Step 0: NLL = 630.8566987548214
Step 100: NLL = 571.0031897103476
Step 200: NLL = 519.1829201320355
Step 300: NLL = 473.19687586107295
Step 400: NLL = 431.8243585761917
Step 500: NLL

In [12]:
# We can construct the posterior at a new set of `index_points` using the same
# kernel (with the same parameters, which we'll optimize below).
# index_points = np.linspace(-1., 1., 100)[..., np.newaxis]
gprm = tfd.GaussianProcessRegressionModel(
    kernel=kernel,
    index_points=np.expand_dims(co2_df.Date.values, 1),
    observation_index_points=date_obeserved,
    observations=co2_observed,
    observation_noise_variance=observation_noise_variance)

samples = gprm.sample(10)
mean = gprm.mean()

samples_ = session.run(samples)
print('samples_: ', samples_.shape)
mean_ = session.run(mean)
print('mean_: ', mean_.shape)


DotProd._apply
x1:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (721, 1, 1)
x2:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (1, 721, 1)
x2.T:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (1, 721, 1)
dot_prod:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (721, 721)
dot_prod:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (721, 721)
dot_prod:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (721, 721)

DotProd._apply
x1:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (721, 1, 1)
x2:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (1, 545, 1)
x2.T:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (1, 545, 1)
dot_prod:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (721, 545)
dot_prod:  <class 'tensorflow.python.framework.ops.Tensor'> <dtype: 'float64'> (721, 545)
dot_

In [13]:
variables = [
    amplitude_periodic, 
    length_scale_periodic,
    period_periodic,
    amplitude_linear,
    bias_linear,
    power_linear,
    amplitude_linear_smooth,
    length_scale_linear_smooth,
    length_scale_irregular,
    scale_mixture_irregular,
    observation_noise_variance
]

variables_eval = session.run(variables)
for var, var_eval in zip(variables, variables_eval):
    print(var.name, var_eval)

amplitude_periodic:0 3.034534535772797
length_scale_periodic:0 1.4394902174247306
period_periodic:0 0.9999906805150176
amplitude_linear:0 0.2139520221204997
bias_linear:0 0.36411537591726084
power_linear:0 1.0
amplitude_linear_smooth:0 0.2139519949139321
length_scale_linear_smooth:0 78.93428685374977
length_scale_irregular:0 0.4542332204105937
scale_mixture_irregular:0 0.030582754206149226
observation_noise_variance:0 0.03135804959292352


In [16]:
# make plot
p = figure(width=600, height=400)
p.xaxis.axis_label = 'Date'
p.yaxis.axis_label = 'CO2 [ppm]'
p.add_layout(Title(text=(
    'In situ air measurements at Mauna Loa, Observatory, Hawaii: '
    'Latitude 19.5°N Longitude 155.6°W Elevation 3397m'), 
                   text_font_style="italic"), 'above')
p.add_layout(Title(
    text='Atmospheric CO2 concentrations (ppm)', text_font_size="16pt"), 
             'above')
# p.line(co2_df.Date, co2_df.CO2, line_width=2)
p.line(
    np.squeeze(date_obeserved), co2_observed, legend='observed',
    line_width=2, line_color='darkblue')
p.line(
    co2_df.Date.values, mean_, legend='test',
    line_width=2, line_color='firebrick')
p.line(
    X_test, y_test, legend='test',
    line_width=2, line_color='blue')
p.legend.location = 'top_left'
show(p)

In [15]:
# session.close()

To read:
- http://130.243.105.49/Research/Learning/courses/ml/2011/lectures/ML_2011_L05.pdf 
- https://www.inf.ed.ac.uk/teaching/courses/mlpr/2016/notes/w7c_gaussian_process_kernels.pdf
- https://george.readthedocs.io/en/latest/user/kernels/
- http://ml.dcs.shef.ac.uk/gpss/gpws14/KernelDesign.pdf
- https://stats.stackexchange.com/a/249169/26888


- https://stats.stackexchange.com/questions/222238/why-is-the-mean-function-in-gaussian-process-uninteresting
- https://stats.stackexchange.com/questions/219579/what-is-wrong-with-extrapolation