# In-class notebook: 2025-02-17

In this notebook, we will look at a few different regression techniques. We demonstrate this on a simulated supernova distance modulus-redshift dataset.  

This notebook is intended to support Chapter 8 of the textbook, and material is taken from the following script (from astroML):
* https://github.com/astroML/astroML-notebooks/blob/main/chapter8/astroml_chapter8_Regression.ipynb
* https://github.com/astroML/astroML_figures/blob/main/book_figures/chapter8/fig_regression_mu_z.py
* https://github.com/astroML/astroML_figures/blob/main/book_figures/chapter8/fig_rbf_ridge_mu_z.py
* https://github.com/astroML/astroML_figures/blob/main/book_figures/chapter8/fig_gp_mu_z.py

In [None]:
%matplotlib inline

import numpy as np
from matplotlib import pyplot as plt

In [None]:
# let's first create some fake data with error bars from a true underlying model

from astropy.cosmology import LambdaCDM
from astroML.datasets import generate_mu_z
# this calls an inbuild-function here 
# https://github.com/astroML/astroML/blob/main/astroML/datasets/generated.py
# the error bars have the form dmu = dmu_0 + dmu_1 * mu

z_sample, mu_sample, dmu = generate_mu_z(100, random_state=0, dmu_0=0.1, dmu_1=0.02)

cosmo = LambdaCDM(H0=70, Om0=0.30, Ode0=0.70, Tcmb0=0)
z = np.linspace(0.01, 2, 1000)
mu_true = cosmo.distmod(z)

fig = plt.figure(figsize=(8, 6))

ax = fig.add_subplot(111)

plt.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)
ax.plot(z, mu_true, '--', c='gray')

ax.set_xlim(0.01, 1.8)
ax.set_ylim(36.01, 48)

ax.set_ylabel(r'$\mu$')
ax.set_xlabel(r'$z$')

## We first try a linear regression using the matrix formalism

In [None]:
Y = mu_sample.reshape(100,1)
M = np.zeros((100,2))
M[:,0] = 1
M[:,1] = z_sample.copy()
C = np.diag(np.ones(100)*dmu**2)
# C = np.diag(np.ones(100))
# uncomment to get homoscedastic errorr bars

In [None]:
# (M^TC^-1M)^-1
Theta1 = np.linalg.inv(np.dot(np.dot(M.T, np.linalg.inv(C)), M))

# M^TC^-1Y
Theta2 = np.dot(np.dot(M.T, np.linalg.inv(C)), Y)

# Theta
Theta = np.dot(Theta1, Theta2)

In [None]:
# least square best-fit parameters
print(Theta)

# covariance between parameters
print(Theta1)

In [None]:
fig = plt.figure(figsize=(8, 6))

ax = fig.add_subplot(111)

plt.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)
plt.plot(z, Theta[0]+Theta[1]*z, ls='--', color='blue')
ax.plot(z, mu_true, '--', c='gray')

ax.set_xlim(0.01, 1.8)
ax.set_ylim(36.01, 48)

ax.set_ylabel(r'$\mu$')
ax.set_xlabel(r'$z$')

## We next move onto the inbuilt functions in sklearn, still linear regression 

In [None]:
from sklearn.linear_model import LinearRegression as LinearRegression_sk 
# read about linear models in sklearn here: 
# https://scikit-learn.org/stable/modules/linear_model.html

linear_sk = LinearRegression_sk()

# this is doing the "training"
linear_sk.fit(z_sample[:,None], mu_sample)

mu_fit_sk = linear_sk.predict(z[:, None])

In [None]:
# Plot the results
fig = plt.figure(figsize=(8, 6))

ax = fig.add_subplot(111)

ax.plot(z, mu_fit_sk, '-k')
ax.plot(z, mu_true, '--', c='gray')
ax.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)

plt.plot(z, Theta[0]+Theta[1]*z, ls='--', color='b')
# note that in the above we have not considered errors, which is resulting in 
# some difference compared with the matrix formalism

ax.set_xlim(0.01, 1.8)
ax.set_ylim(36.01, 48)

ax.set_ylabel(r'$\mu$')
ax.set_xlabel(r'$z$')

In [None]:
# astroML is wrapping sklearn to give some additional functions
# https://github.com/astroML/astroML/blob/main/astroML/linear_model/linear_regression.py
# https://github.com/astroML/astroML/blob/main/astroML/linear_model/linear_regression_errors.py

from astroML.linear_model import LinearRegression

linear = LinearRegression()

# this is doing the "training"
# now including the errors
linear.fit(z_sample[:,None], mu_sample, dmu)

mu_fit = linear.predict(z[:, None])

# Plot the results
fig = plt.figure(figsize=(8, 6))

ax = fig.add_subplot(111)

ax.plot(z, mu_fit_sk, '-k')
ax.plot(z, mu_fit, '-k', color='red')

ax.plot(z, mu_true, '--', c='gray')
ax.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)

plt.plot(z, Theta[0]+Theta[1]*z, ls='--', color='b')


ax.set_xlim(0.01, 1.8)
ax.set_ylim(36.01, 48)

ax.set_ylabel(r'$\mu$')
ax.set_xlabel(r'$z$')

## Next try basis function regression

In [None]:
# poynomials

from astroML.linear_model import PolynomialRegression

# 2nd degree polynomial regression
polynomial = PolynomialRegression(degree=2)
# this is doing the "training"
polynomial.fit(z_sample[:,None], mu_sample, dmu)

mu_fit_poly = polynomial.predict(z[:, None])

# Plot the results
fig = plt.figure(figsize=(8, 6))

ax = fig.add_subplot(111)

ax.plot(z, mu_fit_poly, '-k', color='red')

ax.plot(z, mu_true, '--', c='gray')
ax.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)

ax.set_xlim(0.01, 1.8)
ax.set_ylim(36.01, 48)

ax.set_ylabel(r'$\mu$')
ax.set_xlabel(r'$z$')

In [None]:
# Gaussian basis

from astroML.linear_model import BasisFunctionRegression

# Define our number of Gaussians
nGaussians = 10
basis_mu = np.linspace(0, 2, nGaussians)[:, None]
basis_sigma = 3 * (basis_mu[1] - basis_mu[0])

gauss_basis = BasisFunctionRegression('gaussian', mu=basis_mu, sigma=basis_sigma)
# this is doing the "training"
gauss_basis.fit(z_sample[:,None], mu_sample, dmu)

mu_fit_gauss = gauss_basis.predict(z[:, None])

# Plot the results
fig = plt.figure(figsize=(8, 6))

ax = fig.add_subplot(111)

ax.plot(z, mu_fit_gauss, '-k', color='red')

ax.plot(z, mu_true, '--', c='gray')
ax.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)

ax.set_xlim(0.01, 1.8)
ax.set_ylim(36.01, 48)

ax.set_ylabel(r'$\mu$')
ax.set_xlabel(r'$z$')


## Now try kernal regression

In [None]:
# Gaussian kernal (Nadaraya-Watson)

from astroML.linear_model import NadarayaWatson

# Define kernal

NWkernal = NadarayaWatson('gaussian', h=0.1)
# this is doing the "training"
NWkernal.fit(z_sample[:,None], mu_sample, dmu)

mu_fit_NWkernal = NWkernal.predict(z[:, None])

# Plot the results
fig = plt.figure(figsize=(8, 6))

ax = fig.add_subplot(111)

ax.plot(z, mu_fit_NWkernal, '-k', color='red')

ax.plot(z, mu_true, '--', c='gray')
ax.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)

ax.set_xlim(0.01, 1.8)
ax.set_ylim(36.01, 48)

ax.set_ylabel(r'$\mu$')
ax.set_xlabel(r'$z$')


## Now let's look at some regularized regression

In [None]:
# no regression, Ridge regression (L2), Lasso regression (L1)

regularization = ['none', 'l2', 'l1']
kwargs = [dict(), dict(alpha=0.005), dict(alpha=0.001)]
labels = ['Linear Regression', 'Ridge Regression', 'Lasso Regression']
color= ['teal', 'orange', 'navy']

fig = plt.figure(figsize=(8, 6))

ax = fig.add_subplot(111)
ax.plot(z, mu_true, '--', c='gray')
ax.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', lw=1)

# Manually convert data to a gaussian basis
# note that we're ignoring errors here, for the sake of example.
def gaussian_basis(x, mu, sigma):
    return np.exp(-0.5 * ((x - mu) / sigma) ** 2)

centers = np.linspace(0, 1.8, 100)
widths = 0.2
X = gaussian_basis(z_sample[:, np.newaxis], centers, widths)

for i in range(3):
    clf = LinearRegression(regularization=regularization[i],
                           fit_intercept=True, kwds=kwargs[i])
    # this is doing the "training"
    clf.fit(X, mu_sample, dmu)
    fit = clf.predict(gaussian_basis(z[:, None], centers, widths))

    ax.plot(z, fit, '-k', color=color[i], label=labels[i])


ax.set_xlim(0.01, 1.8)
ax.set_ylim(36.01, 48)
ax.legend()

ax.set_ylabel(r'$\mu$')
ax.set_xlabel(r'$z$')

# this is what converting to a Gaussian basis means
# each z point is now mapped to a Gaussian
# this makes the problem highly correlated, thus the regularization is needed
plt.figure()
print(z_sample[0], z_sample[1])
plt.plot(X[0])
plt.plot(X[1])

## Now Gaussian process regression

In [None]:
# Read about Gaussian process in sklearn here: 
# https://scikit-learn.org/stable/modules/gaussian_process.html

from sklearn.gaussian_process.kernels import ConstantKernel, RBF
from sklearn.gaussian_process import GaussianProcessRegressor

kernel = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))

gp = GaussianProcessRegressor(kernel=kernel, alpha=dmu ** 2)

# this is doing the "training"
gp.fit(z_sample[:, None], mu_sample)

# mesh the input space for evaluations of the real function, the prediction and its MSE
z_fit = np.linspace(0, 2, 1000)
y_pred, sigma = gp.predict(z_fit[:, None], return_std=True)
print(sigma.shape)

# Plot the gaussian process
#  gaussian process allows computation of the error at each point
#  so we will show this as a shaded region
fig = plt.figure(figsize=(8, 6))

ax = fig.add_subplot(111)

ax.plot(z, mu_true, '--k')
ax.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray', markersize=6)
ax.plot(z_fit, y_pred, '-k')
ax.fill_between(z_fit, y_pred - 2.0 * sigma, y_pred + 2.0 * sigma,
                alpha=0.2, color='b', label='95% confidence interval')

ax.set_xlabel('$z$')
ax.set_ylabel(r'$\mu$')

ax.set_xlim(0, 2)
ax.set_ylim(36, 48)

## Now nonlinear regression

ie. "fitting"

In [None]:
from astroML.plotting.mcmc import convert_to_stdev
from astroML.utils.decorators import pickle_results

# we want to use tighter errors here, just for illustration
# z_sample, mu_sample, dmu = generate_mu_z(100, random_state=0, dmu_0=0.005, dmu_1=0.005)
z_sample, mu_sample, dmu = generate_mu_z(100, z0=0.3,
                                         dmu_0=0.05, dmu_1=0.004,
                                         random_state=0)


#------------------------------------------------------------
# define a log likelihood in terms of the parameters
#  beta = [omegaM, omegaL]
def compute_logL(beta):
    cosmo = LambdaCDM(H0=71, Om0=beta[0], Ode0=beta[1], Tcmb0=0)
    mu_pred = cosmo.distmod(z_sample).value
    return - np.sum(0.5 * ((mu_sample - mu_pred) / dmu) ** 2)

# Define a function to compute (and save to file) the log-likelihood
# @pickle_results('mu_z_nonlinear.pkl')
def compute_mu_z_nonlinear(Nbins=200):
    omegaM = np.linspace(0.05, 0.75, Nbins)
    omegaL = np.linspace(0.4, 1.1, Nbins)

    logL = np.empty((Nbins, Nbins))

    for i in range(len(omegaM)):
        for j in range(len(omegaL)):
            logL[i, j] = compute_logL([omegaM[i], omegaL[j]])

    return omegaM, omegaL, logL


omegaM, omegaL, res = compute_mu_z_nonlinear()
res -= np.max(res)


In [None]:
fig = plt.figure(figsize=(10, 5))

# left plot: the data and best-fit
ax = fig.add_subplot(121)

# getting best-fit
whr = np.where(res == np.max(res))
omegaM_best = omegaM[whr[0][0]]
omegaL_best = omegaL[whr[1][0]]
cosmo = LambdaCDM(H0=71, Om0=omegaM_best, Ode0=omegaL_best, Tcmb0=0)

z_fit = np.linspace(0.04, 2, 100)
mu_fit = cosmo.distmod(z_fit).value

ax.plot(z_fit, mu_fit, '-k')
ax.errorbar(z_sample, mu_sample, dmu, fmt='.k', ecolor='gray')

ax.set_xlim(0, 1.8)
ax.set_ylim(36, 46)

ax.set_xlabel('$z$')
ax.set_ylabel(r'$\mu$')

ax.text(0.04, 0.96, "%i observations" % len(z_sample),
        ha='left', va='top', transform=ax.transAxes)

# right plot: the likelihood
ax = fig.add_subplot(122)

ax.contour(omegaM, omegaL, convert_to_stdev(res.T),
           levels=(0.683, 0.955, 0.997),
           colors='k')

ax.plot([0, 1], [1, 0], '--k')
ax.plot([0, 1], [0.73, 0.73], ':k')
ax.plot([0.27, 0.27], [0, 2], ':k')

ax.set_xlim(0.05, 0.75)
ax.set_ylim(0.4, 1.1)

ax.set_xlabel(r'$\Omega_M$')
ax.set_ylabel(r'$\Omega_\Lambda$')