# Making a kernel to reproduce a power spectrum

_Alex Malz (GCCL@RUB)_, add your name here

In [None]:
from astroML.decorators import pickle_results
from astroML.correlation import two_point
import george
import matplotlib.pyplot as plt
import numpy as np
from numpy import linalg as la
# import pomegranate
# from pomegranate.distributions import MultivariateGaussianDistribution as MGD

## Examine the precomputed 2PCF

Download the 2PCF at several redshifts [here](https://drive.google.com/drive/folders/1eGlAO_wl9h0xiXiTMKV_m7h9YCRhDHP_?usp=sharing).

Note that the data is $\Delta^{2}(k)$, not the more familiar (to me) $\mathcal{P}(k)$.  (A reminder of the relationship between them can be found [here](http://universe-review.ca/R05-04-powerspectrum.htm), particularly in [this figure](http://universe-review.ca/I02-20-correlate1b.png).)

In [None]:
pk = np.genfromtxt('NptFns/0.042ngpps_new.dat_LOS1').T

In [None]:
print(pk.shape)

In [None]:
nk = pk.shape[-1]

changing units to r from k, not sure whether the units were what I was expecting from the start. . . 

In [None]:
plt.plot(2 * np.pi / pk[0], pk[1])
plt.semilogx()
plt.semilogy()
plt.xlabel(r'$k$ [Mpc/h]')
plt.ylabel(r'$\Delta^2(k)$')

## Make a kernel from the 2PCF

Note: I think this may be backwards!!!

In [None]:
kernel = np.ones((nk, nk))

In [None]:
plt.imshow(kernel, cmap='')

In [None]:
def off_diag_indices(a, i):
    rows, cols = np.diag_indices_from(a)
    if i < 0:
        return rows[-i:], cols[:i]
    elif i > 0:
        return rows[:-i], cols[i:]
    else:
        return rows, cols

In [None]:
for i in range(1, nk):
    ind = off_diag_indices(kernel, -i)
    kernel[ind] = pk[1][-i]
    ind = off_diag_indices(kernel, i)
    kernel[ind] = pk[1][-i]

In [None]:
plt.imshow(kernel, cmap='Spectral_r')
plt.savefig('2PCFkernel.png', dpi=250)

In [None]:
# borrowed from https://gist.github.com/fasiha/fdb5cec2054e6f1c6ae35476045a0bbd
def nearestPD(A):
    """Find the nearest positive-definite matrix to input

    A Python/Numpy port of John D'Errico's `nearestSPD` MATLAB code [1], which
    credits [2].

    [1] https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd

    [2] N.J. Higham, "Computing a nearest symmetric positive semidefinite
    matrix" (1988): https://doi.org/10.1016/0024-3795(88)90223-6
    """

    B = (A + A.T) / 2
    _, s, V = la.svd(B)

    H = np.dot(V.T, np.dot(np.diag(s), V))

    A2 = (B + H) / 2

    A3 = (A2 + A2.T) / 2

    if isPD(A3):
        return A3

    spacing = np.spacing(la.norm(A))
    # The above is different from [1]. It appears that MATLAB's `chol` Cholesky
    # decomposition will accept matrixes with exactly 0-eigenvalue, whereas
    # Numpy's will not. So where [1] uses `eps(mineig)` (where `eps` is Matlab
    # for `np.spacing`), we use the above definition. CAVEAT: our `spacing`
    # will be much larger than [1]'s `eps(mineig)`, since `mineig` is usually on
    # the order of 1e-16, and `eps(1e-16)` is on the order of 1e-34, whereas
    # `spacing` will, for Gaussian random matrixes of small dimension, be on
    # othe order of 1e-16. In practice, both ways converge, as the unit test
    # below suggests.
    I = np.eye(A.shape[0])
    k = 1
    while not isPD(A3):
        mineig = np.min(np.real(la.eigvals(A3)))
        A3 += I * (-mineig * k**2 + spacing)
        k += 1

    return A3

def isPD(B):
    """Returns true when input is positive-definite, via Cholesky"""
    try:
        _ = la.cholesky(B)
        return True
    except la.LinAlgError:
        return False

if __name__ == '__main__':
    import numpy as np
    for i in range(10):
        for j in range(2, 100):
            A = np.random.randn(j, j)
            B = nearestPD(A)
            assert(isPD(B))
    print('unit test passed!')


In [None]:
newkern = nearestPD(kernel)

In [None]:
np.savetxt('2PCF_kernel.csv', newkern)

In [None]:
newkern = np.genfromtxt('2PCF_kernel.csv')

In [None]:
plt.imshow(newkern, cmap='Spectral_r')
plt.savefig('posdef2PCFkernel.png', dpi=250)

## Compare samples from this kernel with the original 2PCF

In [None]:
randfield = np.genfromtxt('RF_fromKernel.dat')

In [None]:
randfield.shape

In [None]:
plt.imshow(randfield)

In [None]:
ftrand = np.fft.ifft2(randfield)

In [None]:
ftrand.shape

In [None]:
plt.imshow(np.real(ftrand))

In [None]:
samps = np.genfromtxt('RF_fromKernel.csv')

In [None]:
samps.shape

In [None]:
plt.imshow(samps)

# scratch after here

In [None]:
nc = samps.shape[0]                # define how many cells your box has
boxlen = 50.0           # define length of box
Lambda = boxlen/4.0     # define an arbitrary wave length of a plane wave
dx = boxlen/nc          # get size of a cell

    # create plane wave density field
density_field = samps#np.zeros((nc, nc, nc), dtype='float')
for x in range(density_field.shape[0]):
    density_field[x,:] = np.cos(2*np.pi*x*dx/Lambda)

In [None]:
    # get overdensity field
delta = density_field/np.mean(density_field) - 1

In [None]:
    # get P(k) field: explot fft of data that is only real, not complex
delta_k = np.abs(np.fft.rfftn(delta).round())
Pk_field =  delta_k**2

In [None]:
    # get 3d array of index integer distances to k = (0, 0, 0)
dist = np.minimum(np.arange(nc), np.arange(nc,0,-1))
dist_z = np.arange(nc//2+1)
dist *= dist
dist_z *= dist_z
dist_3d = np.sqrt(dist[:, None, None] + dist[:, None] + dist_z)

In [None]:
    # get unique distances and index which any distance stored in dist_3d 
    # will have in "distances" array
distances, _ = np.unique(dist_3d, return_inverse=True)

    # average P(kx, ky, kz) to P(|k|)
Pk = np.bincount(_, weights=Pk_field.ravel())/np.bincount(_)

    # compute "phyical" values of k
dk = 2*np.pi/boxlen
k = distances*dk

    # plot results
fig = plt.figure(figsize=(9,6))
ax1 = fig.add_subplot(111)
ax1.plot(k, Pk, label=r'$P(\mathbf{k})$')

In [None]:
img = samps

f = np.fft.fft2(img)
fshift = np.fft.fftshift(f)
magnitude_spectrum = 20*np.log(np.abs(fshift))

plt.subplot(121),plt.imshow(img, cmap = 'gray')
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(magnitude_spectrum, cmap = 'gray')
plt.title('Magnitude Spectrum'), plt.xticks([]), plt.yticks([])
plt.show()          

In [None]:
def compute_results(array, Nbins=20):
#     print(np.shape(array))
#     np.random.seed(rseed)
    bins = 10. ** np.linspace(np.log10(1. / 50000.), np.log10(10.), Nbins)
#     results = [bins]
    print(np.shape(bins))
    results = two_point(array, bins)
    print(np.shape(corr))
#         (array['Ra'],
#                                            array['Dec'],
#                                            bins=bins,
#                                            method=method,
#                                            Nbootstraps=Nbootstraps)
    return (bins, results)
#

In [None]:
(bins, corr) = compute_results(samps)

In [None]:
bins

In [None]:
corr

In [None]:
plt.plot(bins, corr)

In [None]:
bin_centers = 0.5 * (bins[1:] + bins[:-1])
#
bin_centers = bin_centers[np.where(np.isnan(corr)==False)]
corr = corr[np.where(np.isnan(corr)==False)]
corr_err = corr_err[np.where(np.isnan(corr)==False)]
#
# Export the TPCF data to an numpy array
tpcf = np.transpose(np.array([bin_centers, corr, corr_err]))
#
# Print the TPCF file
# print(tpcf)
#
# Export the TPCF data to a file
# np.savetxt('./Data/ngc1566_astroml_tpcf.dat', tpcf)
#
# Import fitting process
from scipy.optimize import curve_fit
#
# Define the fitting function
def fitfunc(x, a, b, c):
    return a*(x/b)**c
#
# Plot the Landy-Szalay TPCF and its fit
popt, pcov = curve_fit(fitfunc, bin_centers[1:10], corr[1:10])
#

still working on this. . . 