<a href="https://colab.research.google.com/github/davidwhogg/NoDataInterpolation/blob/main/notebooks/how_to_combine_spectra.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# How to combine spectra without interpolation.

A notebook to support and illustrate ideas in a forthcoming paper.

## Authors:
- **David W Hogg** (NYU) (MPIA) (Flatiron)
- **Andy Casey** (Monash)

## To-Do:
- Something?

In [None]:
# imports and initialize seeds, fundamental constants
import numpy as np
import pylab as plt
import scipy.interpolate as interp
np.random.seed(42)
c = 299792458. # m / s
sqrt2pi = np.sqrt(2. * np.pi)

In [None]:
# has to be in its own cell?
plt.rc('figure', figsize=(6.0, 3.6), dpi=150, autolayout=True)
# plt.rc('text', usetex=True)

In [None]:
# define high-level parameters, especially including spectrograph parameters
R = 1.35e5 # resolution
SNR = 20. # s/n ratio in the continuum
continuum_ivar = SNR ** 2 # inverse variance of the noise in the continuum
sigma_x = 1. / R # LSF sigma in x units
dx = 1. / (0.85 * R) # pixel spacing in the poorly sampled data
x_min = 8.7000 # minimum ln wavelength
x_max = 8.7025 # maximum ln wavelength
lines_per_x = 2.0e4 # mean density (Poisson rate) of lines per unit ln wavelength
ew_max_x = 3.0e-5 # maximum equivalent width in x units
ew_power = 5.0 # power parameter in EW maker

In [None]:
# set up the line list for the true spectral model
x_margin = 1.e6/c # hoping no velocities are bigger than 1000 km/s
x_range = x_max - x_min + 2. * x_margin # make lines in a bigger x range than the data range
nlines = np.random.poisson(x_range * lines_per_x) # set the total number of lines
line_xs = (x_min - x_margin) + x_range * np.random.uniform(size=nlines)

In [None]:
# give those lines equivalent widths from a power-law distribution
line_ews = ew_max_x * np.random.uniform(size=nlines) ** ew_power

In [None]:
# make the synthetic spectrum (spectral expectation), and also add noise

def oned_gaussian(dxs, sigma):
    return np.exp(-0.5 * dxs ** 2 / sigma ** 2) / (sqrt2pi * sigma)

def true_spectrum(xs, doppler, lxs=line_xs, ews=line_ews, sigma=sigma_x):
    """
    """
    return np.exp(-1. * np.sum(ews[None, :] * oned_gaussian(xs[:, None] - doppler - lxs[None, :], sigma), axis=1))

def ivar(ys, continuum_ivar):
    return continuum_ivar / ys

def noisy_true_spectrum(xs, doppler, continuum_ivar):
    """
    """
    ys_true = true_spectrum(xs, doppler)
    y_ivars = ivar(ys_true, continuum_ivar)
    return  ys_true + np.random.normal(size=xs.shape) / np.sqrt(y_ivars), y_ivars

def doppler_information(xs, doppler, continuum_ivar, dx=0.5*dx):
    """
    # Bugs:
    - Horrifying numerical derivative!
    """
    dys_dx = (true_spectrum(xs, doppler + dx) - true_spectrum(xs, doppler - dx)) / (2. * dx)
    y_ivars = ivar(true_spectrum(xs, doppler), continuum_ivar)
    return np.sum(y_ivars * dys_dx ** 2)

In [None]:
# create fake data with true Doppler shifts on a sinusoid of epoch number
xs = np.arange(x_min, x_max, dx)
n_pix = len(xs)
n_epochs = 16
true_dopplers = (3.e4 / c) * np.cos(np.arange(n_epochs) / 3)
ys = np.zeros((n_epochs, n_pix))
y_ivars = np.zeros((n_epochs, n_pix))
y_ivars_empirical = np.zeros((n_epochs, n_pix))
for j in range(n_epochs):
    ys[j], y_ivars[j] = noisy_true_spectrum(xs, true_dopplers[j], continuum_ivar)
    y_ivars_empirical[j] = ivar(ys[j], continuum_ivar)

In [None]:
# show some example spectra
fig, axes = plt.subplots(n_epochs, 1, sharex=True, sharey=True, figsize=(6., 1.5 * n_epochs))
for j,ax in enumerate(axes):
    ax.step(xs, ys[j], color="k", where="mid", alpha=0.9)
    ax.set_title("epoch {}; $\delta x = {:+f}$".format(j, true_dopplers[j]))
    ax.set_ylabel(r"flux $y$")
ax.set_xlabel(r"ln wavelength $x=\ln\,\lambda$")
plt.xlim(x_min, x_max)
plt.ylim(0.1, 1.1)
plt.savefig("data.pdf")

In [None]:
# zoom in on one epoch and compare it to the true template.
j = 7
fig, axes = plt.subplots(2, 1, sharex=False, sharey=True)
finexs = np.arange(np.min(xs), np.max(xs), 1. / (5. * R))
for ax in axes:
    ax.step(xs, ys[j], color="k", where="mid", alpha=0.9)
    ax.set_ylabel(r"flux $y$")
    ax.plot(finexs, true_spectrum(finexs, 0.), "r-", alpha=1.0, lw=0.5)
    ax.ticklabel_format(useOffset=False)
axes[0].set_xlim(8.7000, 8.70125)
axes[1].set_xlim(8.70125, 8.7025)
axes[0].set_ylim(-0.1, 1.1)
axes[1].set_xlabel(r"ln wavelength $x=\ln\,\lambda$")
axes[0].set_title("epoch {}; $\delta x = {:+f}$".format(j, true_dopplers[j]))

In [None]:
# Set the pixel grid and model complexity for the combined spectrum
xstar = np.arange(x_min, x_max, 1. / (3. * R))
Mstar = len(xstar)
P = Mstar
print(Mstar, P, xstar.shape)

In [None]:
def design_matrix(xs, L=(x_max - x_min)):
    X = np.ones_like(xs).reshape(len(xs), 1)
    for j in range(1, P):
        if j % 2 == 0:
            X = np.concatenate((X, np.cos(np.pi * j * xs / L)[:, None]), axis=1) # looks different from paper bc 0-indexing
        else:
            X = np.concatenate((X, np.sin(np.pi * (j + 1) * xs / L)[:, None]), axis=1)
    return X

In [None]:
def pack_matrices(xs, ys):
    XX = np.array([])
    YY = np.array([])
    for yy,Dx in zip(ys, true_dopplers):
        x_rest = xs - Dx
        I = np.logical_and(x_rest > x_min, x_rest < x_max)
        YY = np.append(YY, yy[I])
        XX = np.append(XX, x_rest[I])
    return design_matrix(XX), YY

In [None]:
X, Y = pack_matrices(xs, ys)
print(X.shape, Y.shape)

In [None]:
Xstar = design_matrix(xstar)
print(Xstar.shape)
thetahat, foo, bar, whatevs = np.linalg.lstsq(X, Y, rcond=None)
print(thetahat.shape)
ystar = Xstar @ thetahat
print(ystar.shape)

In [None]:
# compare the combined spectrum to the true template.
j = 7
fig, axes = plt.subplots(2, 1, sharex=False, sharey=True)
for ax in axes:
    ax.step(xstar, ystar, color="k", where="mid", alpha=0.9)
    ax.set_ylabel(r"flux $y$")
    ax.plot(finexs, true_spectrum(finexs, 0.), "r-", alpha=1.0, lw=0.5)
    ax.ticklabel_format(useOffset=False)
axes[0].set_xlim(8.7000, 8.70125)
axes[1].set_xlim(8.70125, 8.7025)
axes[0].set_ylim(-0.1, 1.1)
axes[1].set_xlabel(r"ln wavelength $x=\ln\,\lambda$")
axes[0].set_title("combined spectrum".format(j, true_dopplers[j]))