# Making a kernel to reproduce a power spectrum

_Alex Malz (GCCL@RUB)_, add your name here

In [None]:
import george
import matplotlib.pyplot as plt
import numpy as np
from numpy import linalg as la
import pomegranate
from pomegranate.distributions import MultivariateGaussianDistribution as MGD

## Examine the precomputed 2PCF

Download the 2PCF at several redshifts [here](https://drive.google.com/drive/folders/1eGlAO_wl9h0xiXiTMKV_m7h9YCRhDHP_?usp=sharing).

Note that the data is $\Delta^{2}(k)$, not the more familiar (to me) $\mathcal{P}(k)$.  (A reminder of the relationship between them can be found [here](http://universe-review.ca/R05-04-powerspectrum.htm), particularly in [this figure](http://universe-review.ca/I02-20-correlate1b.png).)

In [None]:
pk = np.genfromtxt('NptFns/0.042ngpps_new.dat_LOS1').T

In [None]:
print(pk.shape)

In [None]:
nk = pk.shape[-1]

changing units to r from k, not sure whether the units were what I was expecting from the start. . . 

In [None]:
plt.plot(2 * np.pi / pk[0], pk[1])
plt.semilogx()
plt.semilogy()
plt.xlabel(r'$k$ [Mpc/h]')
plt.ylabel(r'$\Delta^2(k)$')

## Make a kernel from the 2PCF

Note: I think this may be backwards!!!

In [None]:
kernel = np.ones((nk, nk))

In [None]:
plt.imshow(kernel)

In [None]:
def off_diag_indices(a, i):
    rows, cols = np.diag_indices_from(a)
    if i < 0:
        return rows[-i:], cols[:i]
    elif i > 0:
        return rows[:-i], cols[i:]
    else:
        return rows, cols

In [None]:
for i in range(1, nk):
    ind = off_diag_indices(kernel, -i)
    kernel[ind] = pk[1][-i]
    ind = off_diag_indices(kernel, i)
    kernel[ind] = pk[1][-i]

In [None]:
plt.imshow(kernel)

In [None]:
# borrowed from https://gist.github.com/fasiha/fdb5cec2054e6f1c6ae35476045a0bbd
def nearestPD(A):
    """Find the nearest positive-definite matrix to input

    A Python/Numpy port of John D'Errico's `nearestSPD` MATLAB code [1], which
    credits [2].

    [1] https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd

    [2] N.J. Higham, "Computing a nearest symmetric positive semidefinite
    matrix" (1988): https://doi.org/10.1016/0024-3795(88)90223-6
    """

    B = (A + A.T) / 2
    _, s, V = la.svd(B)

    H = np.dot(V.T, np.dot(np.diag(s), V))

    A2 = (B + H) / 2

    A3 = (A2 + A2.T) / 2

    if isPD(A3):
        return A3

    spacing = np.spacing(la.norm(A))
    # The above is different from [1]. It appears that MATLAB's `chol` Cholesky
    # decomposition will accept matrixes with exactly 0-eigenvalue, whereas
    # Numpy's will not. So where [1] uses `eps(mineig)` (where `eps` is Matlab
    # for `np.spacing`), we use the above definition. CAVEAT: our `spacing`
    # will be much larger than [1]'s `eps(mineig)`, since `mineig` is usually on
    # the order of 1e-16, and `eps(1e-16)` is on the order of 1e-34, whereas
    # `spacing` will, for Gaussian random matrixes of small dimension, be on
    # othe order of 1e-16. In practice, both ways converge, as the unit test
    # below suggests.
    I = np.eye(A.shape[0])
    k = 1
    while not isPD(A3):
        mineig = np.min(np.real(la.eigvals(A3)))
        A3 += I * (-mineig * k**2 + spacing)
        k += 1

    return A3

def isPD(B):
    """Returns true when input is positive-definite, via Cholesky"""
    try:
        _ = la.cholesky(B)
        return True
    except la.LinAlgError:
        return False

if __name__ == '__main__':
    import numpy as np
    for i in range(10):
        for j in range(2, 100):
            A = np.random.randn(j, j)
            B = nearestPD(A)
            assert(isPD(B))
    print('unit test passed!')


In [None]:
newkern = nearestPD(kernel)

In [None]:
plt.imshow(newkern)

In [None]:
np.savetxt('2PCF_kernel.csv', newkern)

## Sample from this kernel

In [None]:
test = MGD(np.ones(nk), newkern)

still working on this. . . 