In [1]:
import pandas as pd
from vega_datasets import data
from io import BytesIO
from scipy.linalg import solve, norm
import bqplot.pyplot as bqplt
import bqplot as bq
import ipywidgets as widgets
import numpy as np

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
co2 = pd.read_csv(BytesIO(data.co2_concentration.raw()))

co2.columns = co2.columns.str.lower()
co2 = co2.assign(date=lambda f: f["date"].pipe(pd.to_datetime))

In [3]:
def standardize(col):
    return col.sub(col.mean()).div(col.std())


dataset = co2.assign(
    year=lambda f: f["date"].dt.year.astype(float),
    month=lambda f: f["date"].dt.month.astype(float),
    year_2=lambda f: f["year"].pipe(standardize).pow(2),
    cos_month=lambda f: f["month"].pipe(lambda s: np.cos(s * 2 * np.pi / 12)),
    sin_month=lambda f: f["month"].pipe(lambda s: np.sin(s * 2 * np.pi / 12)),
).set_index("date")


def apply(params, x):
    return x @ params[1:] + params[0]


def ols(x, y, reg=0):
    x = np.hstack((np.ones_like(x[:, [0]]), x))
    return solve(x.T @ x + reg * np.eye(x.shape[1]), x.T @ y)

In [4]:
fig = bqplt.figure()

axes_options = {"x": {"label": "date"}, "y": {"label": "co2"}}
targets = bqplt.plot(
    dataset.index,
    [[], []],
    colors=["blue", "orange"],
    labels=["Prediction", "Measurement"],
    display_legend=True,
    axes_options=axes_options,
)
vline = bqplt.vline([pd.Timestamp("2010-01-01")])


def on_change(_):
    pivot_year = pivot_slider.value
    pivot = pd.Timestamp(f"{pivot_year}-01-01")

    reg = reg_slider.value

    features = list(features_choice.value)

    x_train = dataset.loc[:pivot, features]
    mu = x_train.mean()
    std = x_train.std()
    x_norm = x_train.sub(mu).div(std).to_numpy()
    y_train = dataset.loc[:pivot, "co2"].to_numpy()

    weights = ols(x_norm, y_train, reg=reg)

    weights_label.value = f"<b>Bias</b>: {weights[0]:.2f} || " + " | ".join(
        [f"<b>{name}</b>: {w:.2f}" for name, w in zip(features, weights[1:])]
    )
    targets.y = [apply(weights, dataset[features].sub(mu).div(std)), dataset["co2"]]
    vline.x = [pivot, pivot]


features = ["year", "year_2", "month", "cos_month", "sin_month"]
features_choice = widgets.SelectMultiple(
    options=features, description="Features", value=features
)
features_choice.observe(on_change, "value")

pivot_slider = widgets.IntSlider(min=1960, max=2015, description="Pivot year")
pivot_slider.observe(on_change, "value")

weights_label = widgets.HTML(value="", description="Weights")

reg_slider = widgets.FloatLogSlider(min=-3, max=1, value=1.0, description="Regularize")
reg_slider.observe(on_change, "value")

pivot_slider.value = 1980

widgets.VBox([widgets.VBox([pivot_slider, features_choice, weights_label]), fig])

VBox(children=(VBox(children=(IntSlider(value=1980, description='Pivot year', max=2015, min=1960), SelectMulti…