In [1]:
# Test out some data
import plotly
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)

import plotly.express as px
from src.data.load import get_data


data_id = "solar"
df = get_data(data_id)
df.reset_index(inplace=True)

fig = px.scatter(data_frame=df, x="x", y="y")
plotly.offline.iplot(fig)

In [2]:
import tensorflow as tf
from src.models.gp import GP


tf.config.set_visible_devices([], "GPU")  # My M1 acting up
gp = GP()
x, y = df["x"].values.reshape(-1, 1), df["y"].values.reshape(-1, 1)
gp.fit(x, y, steps=0)
preds = gp.predict(x)

2022-11-02 11:40:17.349924: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-02 11:40:19.861140: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
import plotly.graph_objects as go

fig.add_traces(go.Scatter(x=x.flatten(), y=preds.flatten(), mode="lines", name="predictions"))

In [4]:
from sklearn.metrics import mean_squared_error

print(f"Mean error: {mean_squared_error(preds, df['y'])}")

Mean error: 5.3905632082979955e-24


In [5]:
# Let's add confidence intervals

m, cov = gp.conditional_distribution_at(x)

In [6]:
import numpy as np
variances = np.diagonal(cov)
# deal with possible negative values (they will be small rounding errors where present)
variances = np.sqrt(variances**2)


In [13]:
def plot_mean_preds_with_ci(x_seen, y_seen, means, variances_, ci_in_sd: float = 2.) -> None:
    sds = np.sqrt(variances_)
    ci_high = means + (ci_in_sd * sds)
    ci_low = means - (ci_in_sd * sds)
    fig = go.Figure([
        go.Scatter(
            x=x_seen,
            y=y_seen,
            name="True data",
            mode="markers",
        ),
        go.Scatter(
            x=x_seen,
            y=means,
            name="Mean predictions",
            mode="lines",
        ),
        go.Scatter(
            x=np.concatenate([x_seen, x_seen[::-1]]), # x, then x reversed
            y=np.concatenate([ci_high, ci_low[::-1]]), # upper, then lower reversed
            fill="toself",
            name=f"{ci_in_sd}-SD confidence interval"
        )
    ])
    plotly.offline.iplot(fig)


In [14]:
plot_mean_preds_with_ci(x.flatten(), y.flatten(), m.flatten(), variances.flatten())

In [15]:
# Let's drop some points

In [16]:
x_partial = x[::2]
y_partial = y[::2]

In [39]:
gp_partial = GP(lengthscale=1, s=5)
gp_partial.fit(x_partial, y_partial, steps=0)
m_partial, cov_partial = gp_partial.conditional_distribution_at(x)
variances_partial = np.diagonal(cov_partial)
# deal with possible negative values (they will be small rounding errors where present)
variances_partial = np.sqrt(variances_partial**2)
plot_mean_preds_with_ci(x.flatten(), y.flatten(), m_partial.flatten(), variances.flatten())