In [117]:
%load_ext autoreload
%autoreload 2

import tensorflow as tf

from lqmc.copula import GaussianCopula, GaussianCopulaAntiparallelCorrelated
from lqmc.gp import RandomFeatureGaussianProcess
from lqmc.kernels import ExponentiatedQuadraticKernel
from lqmc.random import Seed
from lqmc.utils import to_tensor
from data.datasets import Wine


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [119]:
dataset = Wine(
    seed=[0, 0],
    split_id=0,
    num_splits=10,
    dtype=tf.float64,
)

In [120]:
joint_sampler = GaussianCopulaAntiparallelCorrelated(
    dim=dataset.dim,
    correlation_factor=-0.99,
    dtype=tf.float64,
)

In [121]:
#@tf.function
def joint_sampler_gradient_step(
        seed: Seed,
        joint_sampler: GaussianCopula,
        kernel: ExponentiatedQuadraticKernel,
        x_train: tf.Tensor,
        optimizer: tf.keras.optimizers.Adam,
        num_ensembles: int = 1,
    ):
    with tf.GradientTape() as tape:
        seed, omega = joint_sampler(seed=seed, batch_size=num_ensembles)
        seed, loss = kernel.rmse_loss(seed=seed, omega=omega, x1=x_train, x2=x_train, num_ensembles=num_ensembles)
    gradients = tape.gradient(loss, joint_sampler.trainable_variables)
    optimizer.apply_gradients(zip(gradients, joint_sampler.trainable_variables))
    return seed, loss

In [138]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
kernel = ExponentiatedQuadraticKernel(
    dim=int(dataset.dim),
    lengthscales=dataset.dim*[1.0],
    output_scale=1.0,
    dtype=tf.float64,
)
seed = [0, 0]

for i in range(1000):

    seed, loss = joint_sampler_gradient_step(
        seed=seed,
        joint_sampler=joint_sampler,
        kernel=kernel,
        x_train=dataset.x_train,
        optimizer=optimizer,
        num_ensembles=10,
    )

    if i % 100 == 0:
        print(f"loss: {loss}")

loss: 0.05196146511923714
loss: 0.050923672377058186
loss: 0.05194891517125179


KeyboardInterrupt: 

In [139]:
@tf.function
def rfgp_gradient_step(
        seed: Seed,
        rfgp: RandomFeatureGaussianProcess,
        optimizer: tf.keras.optimizers.Adam,
        num_ensembles: int = 1,
    ):
    with tf.GradientTape() as tape:
        seed, loss = rfgp.loss(seed=seed, num_ensembles=num_ensembles)
    gradients = tape.gradient(loss, rfgp.trainable_variables)
    optimizer.apply_gradients(zip(gradients, rfgp.trainable_variables))
    return seed, loss

seed = [0, 0]
kernel = ExponentiatedQuadraticKernel(
    dim=int(dataset.dim),
    lengthscales=dataset.dim*[6.5],
    output_scale=1.0,
    dtype=tf.float64,
)
rfgp = RandomFeatureGaussianProcess(
    kernel=kernel,
    noise_std=1.0,
    x=dataset.x_train,
    y=dataset.y_train[:, 0],
    joint_sampler=joint_sampler,
    dtype=tf.float64,
)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
seed = [0, 0]

for i in range(10000):

    seed, loss = rfgp_gradient_step(
        seed=seed,
        rfgp=rfgp,
        optimizer=optimizer,
        num_ensembles=1,
    )

    if i % 1000 == 0:
        print(f"loss: {loss:.3f} {rfgp.noise_std.numpy():.3f} {rfgp.kernel.output_scale.numpy():.3f} {rfgp.kernel.lengthscales.numpy()}")

loss: 1.074 0.990 1.010 [6.5651002  6.56528626 6.56528568 6.43547371 6.5652541  6.43547634
 6.43533742 6.56528374 6.5652846  6.56441641 6.56522429 6.43534251
 6.4353541 ]
loss: 0.215 0.226 0.914 [  2.29089468 145.76919545  16.87732873   7.57717587 148.81815496
  83.68998196   1.47803022  10.29458274 131.73558514   3.23631223
  49.91876232   7.63843801   3.11762401]
loss: 0.120 0.223 0.926 [  2.16448475 242.62322676  17.45124547   7.30321485 245.71342163
 134.61630539   1.38786265  12.65371679 251.2093064    3.49851768
 108.18597506   7.84017361   3.10764322]
loss: 0.107 0.223 0.893 [  2.21598068 336.73693732  17.89371505   7.18929117 385.68001594
 234.68663671   1.36389674  10.63192441 376.01891942   3.56922774
 148.86889701   7.88142331   2.72672067]
loss: 0.141 0.219 0.939 [  2.25966302 495.63506621  17.49184446   7.79419685 543.4616925
 311.5839817    1.31945686  12.05732798 453.3336351    3.84790066
 209.56972999   5.96309791   3.03162152]
loss: 0.142 0.222 0.943 [  2.03992943 632.

In [140]:
rfgp.noise_std, kernel.output_scale, kernel.lengthscales

(<tf.Tensor: shape=(), dtype=float64, numpy=0.22431304140352226>,
 <tf.Tensor: shape=(), dtype=float64, numpy=0.912676400995032>,
 <tf.Tensor: shape=(13,), dtype=float64, numpy=
 array([2.10475053e+00, 2.06796190e+03, 1.59273085e+01, 7.64344225e+00,
        1.41594364e+03, 1.52012934e+03, 1.42611061e+00, 1.04353950e+01,
        1.37892532e+03, 3.57385997e+00, 5.90686387e+02, 7.14603587e+00,
        3.05445804e+00])>)

In [141]:
tf.reduce_mean(dataset.y_train), tf.math.reduce_std(dataset.y_train)

(<tf.Tensor: shape=(), dtype=float64, numpy=3.8616453030440226e-17>,
 <tf.Tensor: shape=(), dtype=float64, numpy=1.0>)

In [142]:
tf.reduce_mean(dataset.y_test), tf.math.reduce_std(dataset.y_test)

(<tf.Tensor: shape=(), dtype=float64, numpy=0.08836552891871442>,
 <tf.Tensor: shape=(), dtype=float64, numpy=0.9919541791966401>)

In [143]:
_, pred_mean, pred_cov = rfgp(seed=seed, x_pred=dataset.x_test, num_ensembles=10, noiseless=False)

In [144]:
# Compute RMSE
rmse = tf.math.sqrt(tf.reduce_mean((pred_mean - dataset.y_test[:, 0])**2))
print(rmse)

tf.Tensor(0.21072946557225744, shape=(), dtype=float64)


In [145]:
pred_mean.shape, dataset.y_test.shape

(TensorShape([17]), TensorShape([17, 1]))