In [1]:
%matplotlib inline

import numpy as np 
import scipy
import matplotlib.pyplot as plt

# add the gridding path 
# import sys
# sys.path.append("/home/ian/Research/Disks/MillionPoints/million-points-of-light")
# sys.path.append("/Users/ianczekala/Documents/ALMA/million-points-of-light")

# import gridding

import theano
import theano.tensor as tt
from theano.tensor import fft

import pymc3 as pm

# convert from arcseconds to radians
arcsec = np.pi / (180.0 * 3600) # [radians]  = 1/206265 radian/arcsec

In [2]:
def sky_plane(alpha, dec, a=1, delta_alpha=1.0*arcsec, delta_delta=1.0*arcsec, sigma_alpha=1.0*arcsec,
              sigma_delta=1.0*arcsec, Omega=0.0):
    '''
    alpha: ra (in radians)
    delta: dec (in radians)
    a : amplitude
    delta_alpha : offset (in radians)
    delta_dec : offset (in radians)
    sigma_alpha : width (in radians)
    sigma_dec : width (in radians)
    Omega : position angle of ascending node (in degrees east of north)
    '''

    return a * np.exp(-( (alpha - delta_alpha)**2/(2 * sigma_alpha**2) + \
                        (dec - delta_delta)**2/(2 * sigma_delta**2)))


def fourier_plane(u, v, a=1, delta_alpha=1.0*arcsec, delta_delta=1.0*arcsec, sigma_alpha=1.0*arcsec,
              sigma_delta=1.0*arcsec, Omega=0.0):
    '''
    Calculate the Fourier transform of the Gaussian. Assumes u, v in kλ.
    '''

    # convert back to lambda
    u = u * 1e3
    v = v * 1e3

    return 2 * np.pi * a * sigma_alpha * sigma_delta * np.exp(- 2 * np.pi**2 * \
                (sigma_alpha**2 * u**2 + sigma_delta**2 * v**2) - 2 * np.pi * 1.0j * \
                                                    (delta_alpha * u + delta_delta * v))


# the gradients
def dV_ddelta_alpha(u, v, a=1, delta_alpha=1.0*arcsec, delta_delta=1.0*arcsec, sigma_alpha=1.0*arcsec,
              sigma_delta=1.0*arcsec, Omega=0.0):
    
    
    return -2 * np.pi * 1j * u * fourier_plane(u*1e-3, v*1e-3, a, delta_alpha, delta_delta, sigma_alpha,
              sigma_delta, Omega)


def dV_ddelta_delta(u, v, a=1, delta_alpha=1.0*arcsec, delta_delta=1.0*arcsec, sigma_alpha=1.0*arcsec,
              sigma_delta=1.0*arcsec, Omega=0.0):
    
    
    return -2 * np.pi * 1j * v * fourier_plane(u*1e-3, v*1e-3, a, delta_alpha, delta_delta, sigma_alpha,
              sigma_delta, Omega)


In [3]:
def fftspace(width, N):
    '''Oftentimes it is necessary to get a symmetric coordinate array that spans ``N``
     elements from `-width` to `+width`, but makes sure that the middle point lands
     on ``0``. The indices go from ``0`` to ``N -1.``
     `linspace` returns  the end points inclusive, wheras we want to leave out the
     right endpoint, because we are sampling the function in a cyclic manner.'''

    assert N % 2 == 0, "N must be even."

    dx = width * 2.0 / N
    xx = np.empty(N, np.float)
    for i in range(N):
        xx[i] = -width + i * dx
    
    return xx

In [4]:
# Let's plot this up and see what it looks like 

N_alpha = 128
N_dec = 128
img_radius = 15.0 * arcsec


# full span of the image
ra = fftspace(img_radius, N_alpha) # [arcsec]
dec = fftspace(img_radius, N_dec) # [arcsec]

In [5]:
# calculate the maximum u and v points that our image grid can sample 
dRA = (2 * img_radius) / N_alpha # radians
max_baseline = 1 / (2 * dRA) * 1e-3 # kilolambda, nyquist rate
print(max_baseline) # kilolambda

440.03158666047227


In [6]:
# load the fake data

N_vis = 100 # number of data points 

data_points = np.load("data_points.npy")
print(data_points.shape)
u_data, v_data = data_points.T

data_real = np.load("data_real.npy")
data_imag = np.load("data_imag.npy")
print(data_real.shape, data_imag.shape)

noise = np.load("noise.npy")
print(noise.shape)

(100, 2)
(100,) (100,)
(100,)


In [7]:
# create fixed quantities that we can pre-calculate in numpy before stuffing into the Theano part 

# the image plane grid (fixed throughout the problem)
XX, YY = np.meshgrid(np.fft.fftshift(ra), np.fft.fftshift(dec))

# the image-plane taper (fixed throughout problem)
corrfun_mat = np.load("corrfun.npy")

# the u and v coordinates of the RFFT output (also fixed throughout problem)
u_coords = np.fft.rfftfreq(N_alpha, d=(2 * img_radius)/N_alpha) * 1e-3  # convert to [kλ]
v_coords = np.fft.fftfreq(N_dec, d=(2 * img_radius)/N_dec) * 1e-3  # convert to [kλ]

# load the C_real and C_imag matrices (stored as dense arrays in numpy save files)
# these are scipy csc sparse matrices that will be stuffed into Theano objects
C_real = scipy.sparse.csc_matrix(np.load("C_real.npy"))
C_imag = scipy.sparse.csc_matrix(np.load("C_imag.npy"))

In [8]:
C_real

<100x8320 sparse matrix of type '<class 'numpy.float64'>'
	with 3543 stored elements in Compressed Sparse Column format>

In [9]:
C_imag

<100x8320 sparse matrix of type '<class 'numpy.float64'>'
	with 3543 stored elements in Compressed Sparse Column format>

# Sampling in PyMC3 

In [10]:
with pm.Model() as model:

    # the image-plane grid over which the sky model will be evaluated
    # NOTE that these must be `fftshifted` already.
    # add an extra dimension for the later packing into the rfft
    alpha = theano.shared(XX[np.newaxis,:])
    dalpha = abs(alpha[0,0,1] - alpha[0,0,0])
    delta = theano.shared(YY[np.newaxis,:])
    ddelta = abs(delta[0,1,0] - delta[0,0,0])
    
    # Define the PyMC3 model parameters, which are just for the image plane model
    a = pm.Uniform("a", lower=0.0, upper=10.0)
    delta_alpha = pm.Uniform("delta_alpha", lower=-1*arcsec, upper=2*arcsec)
    delta_delta = pm.Uniform("delta_delta", lower=-1*arcsec, upper=2*arcsec)
    sigma_alpha = pm.Uniform("sigma_alpha", lower=0.5*arcsec, upper=1.5*arcsec)
    sigma_delta = pm.Uniform("sigma_delta", lower=0.5*arcsec, upper=1.5*arcsec)
    
    # Calculate the sky-plane model
    I = a * tt.exp(-(alpha - delta_alpha)**2/(2 * sigma_alpha**2) - (delta - delta_delta)**2/(2 * sigma_delta**2))
    # since the input coordinates were already shifted, then this is too
    # I shape should be (1, N_dec, N_alpha)

    # taper the image with the gridding correction function
    # this should broadcast OK, since the trailing image dimensions match
    corrfun = theano.shared(corrfun_mat)
    I_tapered = I * corrfun

    # use the FFT to transform the image sky model to the Fourier plane
    # output from the RFFT is (1, N_delta, N_alpha//2 + 1, 2)
    rfft = dalpha * ddelta * fft.rfft(I_tapered, norm=None)  

    # Store the interpolation matrices as theano shared variables, make sure it's sparse
    C_real_sparse = theano.sparse.CSC(C_real.data, C_real.indices, C_real.indptr, C_real.shape)
    C_imag_sparse = theano.sparse.CSC(C_imag.data, C_imag.indices, C_imag.indptr, C_imag.shape)

    # flatten the RFFT output appropriately for the interpolation, taking the real and imag parts separately
    vis_real = rfft[0, :, :, 0].flatten() # real values 
    vis_imag = rfft[0, :, :, 1].flatten() # imaginary values

    # interpolate the RFFT to the baselines by a sparse matrix multiply
    interp_real = theano.sparse.dot(C_real_sparse, vis_real)
    interp_imag = theano.sparse.dot(C_imag_sparse, vis_imag)
    
    shape_print = tt.printing.Print('interp_real')(interp_real.shape)
    
    # condition on the real and imaginary observations
#     pm.Normal("obs_real", mu=interp_real, sd=noise, observed=data_real)
#     pm.Normal("obs_imag", mu=interp_imag, sd=noise, observed=data_imag)
    pm.Normal("obs_real", mu=interp_real, sd=noise, observed=data_real, shape=(100, 1))
#     pm.Normal("obs_imag", mu=interp_imag, sd=noise, observed=data_imag)

interp_real __str__ = [100]


In [11]:
model.basic_RVs

[a_interval__,
 delta_alpha_interval__,
 delta_delta_interval__,
 sigma_alpha_interval__,
 sigma_delta_interval__,
 obs_real]

In [12]:
model.unobserved_RVs

[a_interval__,
 delta_alpha_interval__,
 delta_delta_interval__,
 sigma_alpha_interval__,
 sigma_delta_interval__,
 a,
 delta_alpha,
 delta_delta,
 sigma_alpha,
 sigma_delta]

In [13]:
model.free_RVs

[a_interval__,
 delta_alpha_interval__,
 delta_delta_interval__,
 sigma_alpha_interval__,
 sigma_delta_interval__]

In [39]:
with model:
    s = pm.Metropolis(vars=model.free_RVs)
    trace = pm.sample(100, tune=100, step=s, cores=1)

Only 100 samples in chain.
Sequential sampling (2 chains in 1 job)
CompoundStep
>Metropolis: [sigma_delta]
>Metropolis: [sigma_alpha]
>Metropolis: [delta_delta]
>Metropolis: [delta_alpha]
>Metropolis: [a]
100%|██████████| 200/200 [00:01<00:00, 111.17it/s]
100%|██████████| 200/200 [00:01<00:00, 111.67it/s]
The gelman-rubin statistic is larger than 1.4 for some parameters. The sampler did not converge.
The number of effective samples is smaller than 10% for some parameters.


In [13]:
pm.summary(trace)

Unnamed: 0,mean,sd,mc_error,hpd_2.5,hpd_97.5,n_eff,Rhat
a,1.004894,0.03679826,0.003675938,0.968276,1.045015,1.011401,14.19686
delta_alpha,5e-06,4.955509e-08,4.93688e-09,5e-06,5e-06,,1.705685
delta_delta,5e-06,1.082184e-08,1.082184e-09,5e-06,5e-06,,25424280000000.0
sigma_alpha,5e-06,1.200049e-07,1.20003e-08,5e-06,5e-06,1.006177,34.69772
sigma_delta,5e-06,1.087083e-07,1.087019e-08,5e-06,5e-06,,4.260857


In [14]:
with model:
    trace = pm.sample(draws=1000, tune=1000, chains=1)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...


ValueError: shapes (100,) and (8320,) not aligned: 100 (dim 0) != 8320 (dim 0)