#### Imports

In [1]:
import numpy
import pandas
import glob
import emcee

import eztao
import eztao.ts

import celerite

import pp

#### Define functions used in CARMA process

In [2]:
# chi-sqared
def chisqg(y_data, y_model, sd=None):
    chisq = numpy.nansum(((y_data-y_model)/sd)**2)
    return chisq

################################
# Define the prior and log-probability functions for MCMC
################################

# prior function for tau_perturb
def lnprior_perturb(theta):
    """Prior on perturbation timescale. Note: this is a wedge like prior."""

    # determine DHO timescales
    log10_tau_perturb = (theta[-1] - theta[-2])/numpy.log(10)
    if -3 <= log10_tau_perturb <= 5:
        prior = 0
    else:
        prior = -(numpy.abs(log10_tau_perturb - 1) - 4)

    return prior

def lnprior_bounds(theta):
    """Prior on AR and MA parameters. This is a flat prior."""

    # Place some bounds on the parameter space
    bounds_low = numpy.array([-15, -15, -20, -20])
    bounds_high = numpy.array([15, 15, 10, 10])

    log_a1, log_a2, log_b0, log_b1 = theta
    if ( 
        bounds_low[0] < log_a1 < bounds_high[0] 
        and bounds_low[1] < log_a2 < bounds_high[1] 
        and bounds_low[2] < log_b0 < bounds_high[2] 
        and bounds_low[3] < log_b1 < bounds_high[3] 
       ):
        return 0.0
    return -numpy.inf

# We'll use the eztao version which effectively returns "gp.log_likelihood" from the GP and np.inf otherwise
def lnlike(theta, y, gp):
    return -eztao.ts.neg_param_ll(theta, y, gp)

def lnprob(theta, y, gp):
    lp_bounds = lnprior_bounds(theta)
    lp_perturb = lnprior_perturb(theta)                              
    if not numpy.isfinite(lp_bounds):
        return -numpy.inf
    return lp_bounds + lp_perturb + lnlike(theta, y, gp)

CARMA Processes

In [3]:
def getCARMAstats(file):
    ################################
    # setup
    ################################

    #file_name = file[5:-4]
    file_name = file[5:-8]
    
    # read-in light curve
    df = pandas.read_csv(file)

    # obtain values from df
    ra = df['ra'].values[0]
    dec = df['dec'].values[0]
    t = df['mjd'].values
    y_real = df['mag'].values
    yerr_real = df['magerr'].values
    lc_length = len(t)
    
    # invert the magnitudes
    y_real_inverted = (min(y_real)-y_real)

    # normalize to unit standard deviation and zero mean
    y = (y_real_inverted - numpy.mean(y_real_inverted))/numpy.std(y_real_inverted)
    yerr = yerr_real/numpy.std(y_real_inverted)
        
    
    
    ################################
    ################################
    #
    # DRW Process
    #
    ################################
    ################################
    
    # obtain best-fit
    best_drw = eztao.ts.drw_fit(t, y_real, yerr_real)
    
    # define celerite GP model
    drw_gp = celerite.GP(eztao.carma.DRW_term(*numpy.log(best_drw)), mean=numpy.median(y_real))
    drw_gp.compute(t, yerr_real)
    
    # define log prob function
    def param_ll(*args):
        return -eztao.ts.neg_param_ll(*args)

    # initialize the walker, specify number of walkers, prob function, args and etc.
    initial = numpy.array(numpy.log(best_drw))
    ndim, nwalkers = len(initial), 32
    sampler_drw = emcee.EnsembleSampler(nwalkers, ndim, param_ll, args=[y_real, drw_gp])

    # run a burn-in surrounding the best-fit parameters obtained above
    p0 = initial + 1e-8 * numpy.random.randn(nwalkers, ndim)
    p0, lp, _ = sampler_drw.run_mcmc(p0, 500)

    # clear up the stored chain from burn-in, rerun the MCMC
    sampler_drw.reset()
    sampler_drw.run_mcmc(p0, 2000);
    
    # remove points with low prob for the sake of making good corner plot
    prob_threshold_drw = numpy.percentile(sampler_drw.flatlnprobability, 3)
    clean_chain_drw = sampler_drw.flatchain[sampler_drw.flatlnprobability > prob_threshold_drw, :]
        
    
    
    ################################
    ################################
    #
    # DHO Process
    #
    ################################
    ################################
    
    # obtain best-fit
    bounds = [(-15, 15), (-15, 15), (-20, 10), (-20, 10)]
    best_dho = eztao.ts.dho_fit(t, y, yerr, user_bounds=bounds)

    # define celerite GP model
    dho_gp = celerite.GP(eztao.carma.DHO_term(*numpy.log(best_dho)), mean=numpy.median(y))
    dho_gp.compute(t, yerr)

    # Create the GP model -- instead of creating a "model" function that is then called by the "lnlike" function from tutorial,
    #  we will create a GP that will be passed as an argument to the MCMC sampler. This will be the "gp" that is passed to
    #  the "lnprob" and "param_ll" functions
    kernel = eztao.carma.DHO_term(*numpy.log(best_dho))
    gp = celerite.GP(kernel, mean=numpy.median(y))
    gp.compute(t, yerr)

    ################################
    # MCMC
    ################################

    # Initalize MCMC
    data = (t, y, yerr)
    nwalkers = 128
    niter = 2048

    initial = numpy.array(numpy.log(best_dho))
    ndim = len(initial)
    p0 = [numpy.array(initial) + 1e-7 * numpy.random.randn(ndim) for i in range(nwalkers)]

    # Create the MCMC sampler -- note that the GP is passed as an argument in addition to the data
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=[y, gp])

    # run a burn-in surrounding the best-fit parameters obtained above
    p0, lp, _ = sampler.run_mcmc(p0, 200)
    sampler.reset()

    # clear up the stored chain from burn-in, rerun the MCMC
    pos, prob, state = sampler.run_mcmc(p0, niter);

    ################################
    # Obtain the Best Fit: theta_max
    ################################

    # put all the samples that explored in MCMC into a single array
    samples = sampler.flatchain
    
    # find the parameters that have the best fit 
    theta_max_index = numpy.argmax(sampler.flatlnprobability)
    theta_max_probability = sampler.flatlnprobability[theta_max_index]
   
    theta_max  = samples[theta_max_index] # these are in log-space
    theta_max_norm = numpy.exp(theta_max) # take the exponent to get into 'normal' space
    
    
    
    ################################
    ################################
    #
    # Return
    #
    ################################
    ################################
    
    # create simulated light curve
    drw_sim_t, drw_sim_y, drw_sim_yerr = eztao.ts.carma_sim.pred_lc(t, y_real, yerr_real, best_drw, 1, t)
    dho_sim_t, dho_sim_y, dho_sim_yerr = eztao.ts.carma_sim.pred_lc(t, y, yerr, theta_max_norm, 2, t)
    
    # get chi-squared from sim light curves
    chisq_drw = chisqg(y_real, drw_sim_y, yerr_real)
    chisq_dho = chisqg(y, dho_sim_y, yerr)
    
    # determine best fit
    best_fit = 'DRW'
    if chisq_drw > chisq_dho and not numpy.isinf(chisq_dho):
        best_fit = 'DHO'
    
    return file_name, ra, dec, t, y_real, yerr_real, best_drw, chisq_drw, theta_max_norm, theta_max_probability, chisq_dho, best_fit, lc_length

#### Parallel Python

In [4]:
ppservers = ()

# creates jobserver with ncpus workers
ncpus = 4
job_server = pp.Server(ncpus, ppservers=ppservers)

print("Starting pp with", job_server.get_ncpus(), "workers")

# get list of data files
#repository = glob.glob('data/*.csv')
repository = glob.glob('data/*.parquet')

# intialize lists to save to
file_names = []
times = []
magnitudes = []
mag_errors = []
ras = []
decs =[]
best_fit_drws = []
best_fit_dhos = []
dho_probabilities = []
chi_squared_drw = []
chi_squared_dho = []
best_fits = []
lc_lengths = []

# Submit a list of jobs running getCARMAstats for each file in repository
# getCARMAstats - the function
# (file,) - file with AGN lc
# (chisqg, ...) - tuple with functions on which getCARMAstats depends
# ("numpy", ...) - tuple with package dependencies to be imported
jobs = [(file, job_server.submit(getCARMAstats ,(file,), 
                                 (chisqg, lnprior_perturb, lnprior_bounds, lnlike, lnprob,), 
                                 ("numpy", "pandas", "emcee", "eztao", "eztao.ts",
                                  "celerite"))) for file in repository]

job_num = 1
for file, job in jobs:
    # start job
    file_name, ra, dec, t, y, yerr, best_drw, chisq_drw, best_dho, dho_probability, chisq_dho, best_fit, lc_length = job()
        
    # save data from job
    file_names.append(file_name)
    ras.append(ra)
    decs.append(dec)
    times.append(t)
    magnitudes.append(y)
    mag_errors.append(yerr)
    best_fit_drws.append(best_drw)
    chi_squared_drw.append(chisq_drw)
    best_fit_dhos.append(best_dho)
    dho_probabilities.append(dho_probability)
    chi_squared_dho.append(chisq_dho)
    best_fits.append(best_fit)
    lc_lengths.append(lc_length)
    
    print(f'Completed [{job_num}/{len(jobs)}]: {file_name}')
    job_num += 1

job_server.print_stats()

Starting pp with 4 workers
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
 Completed [1/20]: 103.0511_74.4271_ztfg
Completed [2/20]: 116.038_29.2474_ztfg
  return (np.exp(2 * log_amp), 1 / np.exp(log_tau))
  return (np.exp(2 * log_amp), 1 / np.exp(log_tau))
  return (np.exp(2 * log_amp), 1 / np.exp(log_tau))
  return (np.exp(2 * log_amp), 1 / np.exp(log_tau))
 Completed [3/20]: 133.9072_78.2231_ztfg
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
 Completed [4/20]: 134.877_44.9139_ztfg
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
 Completed [5/20]: 169.9084_51.5543_ztfg
Completed [6/20]: 171.4007_54.3825_ztfg
  return (np.exp(2 * log_amp), 1 / np.exp(log_tau))
  return (np.exp(2 * log_amp), 1 / np.exp(log_tau))
  return (np.exp(2 * log_amp), 1 / np.exp(log_tau))
  return (np.exp(2 * log_amp), 1 / np.exp(log_tau))
  return (np.exp(2 * log_amp), 1 / np.exp(log_tau))
 Completed [7/20]: 2.2982_-0.6152_ztfg
Completed [8/20]: 209.1529_-19.5292_ztfg
Completed [9

#### Create the dataframe

In [5]:
agn_fit_data = pandas.DataFrame({'Filenames': file_names, 'RA': ras, 'DEC': decs, 'Times (MJD)': times, 
                                 'Magnitudes': magnitudes, 'Mag Errors': mag_errors, 
                                 'Best DRW Fit': best_fit_drws, 'DRW_chi_sq': chi_squared_drw,
                                 'Best DHO Fit': best_fit_dhos, 'DHO Probability': dho_probabilities, 'DHO_chi_sq': chi_squared_dho,
                                 'Best Fit': best_fits, 'LC Length': lc_lengths})

agn_fit_data

Unnamed: 0,Filenames,RA,DEC,Times (MJD),Magnitudes,Mag Errors,Best DRW Fit,DRW_chi_sq,Best DHO Fit,DHO Probability,DHO_chi_sq,Best Fit,LC Length
0,103.0511_74.4271_ztfg,103.05135,74.426998,"[58207.2522106, 58207.2740046, 58210.2521065, ...","[14.6737709, 14.6875458, 14.7260599, 14.720105...","[0.0143315587, 0.0143392924, 0.014361714, 0.01...","[0.3118341191278398, 75.32839023917496]",2.447756e-06,"[204.96596286344527, 0.3601051673924065, 14.44...",119.717754,inf,DRW,451
1,116.038_29.2474_ztfg,116.03796,29.247434,"[58204.2889005, 58204.2929514, 58207.1921875, ...","[15.6531496, 15.6059399, 15.995362300000002, 1...","[0.0138543714, 0.0138009964, 0.0144265769, 0.0...","[0.08470319667271456, 0.01179177848717246]",2874.174,"[108.38073747134965, 12.989898960439872, 23.95...",-365.905793,27.217088,DHO,281
2,133.9072_78.2231_ztfg,133.908055,78.223319,"[58749.4978472, 58749.4982986, 58767.4454745, ...","[14.000515900000002, 13.967572199999998, 13.86...","[0.0121124275, 0.0121269217, 0.0121778008, 0.0...","[0.09814216826646766, 0.01666198722019497]",77.57388,"[0.08317344720349124, 547331.555305584, 0.1876...",-25.814471,1.911609,DHO,23
3,134.877_44.9139_ztfg,134.876887,44.913984,"[58202.2585417, 58205.2550463, 58208.2916088, ...","[15.819725, 15.8804407, 15.8719711, 15.6451683...","[0.0119376825, 0.012046949, 0.0120311351, 0.01...","[0.10281449585322677, 0.0025446997645182383]",20024.89,"[13837.042920623278, 16574.374898660204, 17778...",-481.956968,inf,DRW,363
4,169.9084_51.5543_ztfg,169.908433,51.554355,"[58202.2801389, 58205.2693403, 58205.2913773, ...","[18.1211014, 18.0685997, 18.0709877, 18.141630...","[0.0327100791, 0.0316537693, 0.0317007303, 0.0...","[0.3044626621288398, 413.7291340389482]",6.169936e-09,"[0.07022879677446268, 0.00045429256421610496, ...",241.864482,293.202468,DRW,461
5,171.4007_54.3825_ztfg,171.400662,54.3826,"[58202.2801389, 58205.2693403, 58205.2913773, ...","[16.3706856, 16.4121857, 16.4012318, 16.383750...","[0.0183746237, 0.0185461659, 0.0184996165, 0.0...","[0.1795428622980684, 20.96959369536443]",0.0003510955,"[175.89231697144, 4.816474498258094, 39.816942...",-130.835567,inf,DRW,391
6,2.2982_-0.6152_ztfg,2.298253,-0.615179,"[58280.4460301, 58283.4433449, 58286.4433218, ...","[18.1298199, 18.0916977, 18.052021, 18.0613518...","[0.0340682529, 0.0331933089, 0.032314159, 0.03...","[0.11155301376483356, 0.002446850343402199]",2399.306,"[744.8695140481784, 18.22441992690164, 100.379...",-239.491422,inf,DRW,177
7,209.1529_-19.5292_ztfg,209.152892,-19.529115,"[58204.2976273, 58204.3425463, 58207.3003819, ...","[16.146141099999998, 16.1509819, 16.3127480000...","[0.0159391984, 0.0159571394, 0.016604675, 0.01...","[0.19476452512871864, 63.20242217295605]",9.803847e-12,"[1.771657026654485, 0.017436623279834293, 0.27...",-55.026482,6.333394,DRW,88
8,21.8856_19.1788_ztfg,21.88547,19.178891,"[58291.4494444, 58300.4637847, 58303.4548148, ...","[15.6015663, 15.5417223, 15.5534048, 15.529402...","[-0.0110834995, -0.0102506354, -0.010417207099...","[0.09602531753221355, 0.009694813936178527]",5541.1,"[231.9193459697326, 8.10007350734636, 44.67915...",-458.007632,19.369683,DHO,409
9,215.3742_47.7902_ztfg,215.373959,47.790139,"[58206.30848380001, 58206.3090856, 58234.25807...","[15.3493433, 15.4037571, 15.4523144, 15.436457...","[0.0122431618999999, 0.0144965276, 0.012418657...","[0.1692233355716742, 216.8664038525919]",7.332924e-08,"[2087.3398426438334, 8.78317858831009, 507.576...",-32.844583,128.942261,DRW,207
