### Parallel Python

In [1]:
import numpy
import matplotlib.pyplot
import pandas
import glob
import emcee

import eztao
import eztao.ts

import celerite

import pp

In [2]:
################################
# Define CARMA function for DRW
################################

################################
# Define the prior and log-probability functions for MCMC
################################

# prior function for tau_perturb
def lnprior_perturb(theta):
    """Prior on perturbation timescale. Note: this is a wedge like prior."""

    # determine DHO timescales
    log10_tau_perturb = (theta[-1] - theta[-2])/numpy.log(10)
    if -3 <= log10_tau_perturb <= 5:
        prior = 0
    else:
        prior = -(numpy.abs(log10_tau_perturb - 1) - 4)

    return prior

def lnprior_bounds(theta):
    """Prior on AR and MA parameters. This is a flat prior."""

    # Place some bounds on the parameter space
    bounds_low = numpy.array([-15, -15, -20, -20])
    bounds_high = numpy.array([15, 15, 10, 10])

    log_a1, log_a2, log_b0, log_b1 = theta
    if ( 
        bounds_low[0] < log_a1 < bounds_high[0] 
        and bounds_low[1] < log_a2 < bounds_high[1] 
        and bounds_low[2] < log_b0 < bounds_high[2] 
        and bounds_low[3] < log_b1 < bounds_high[3] 
       ):
        return 0.0
    return -numpy.inf

# We'll use the eztao version which effectively returns "gp.log_likelihood" from the GP and np.inf otherwise
def lnlike(theta, y, gp):
    return -eztao.ts.neg_param_ll(theta, y, gp)

def lnprob(theta, y, gp):
    lp_bounds = lnprior_bounds(theta)
    lp_perturb = lnprior_perturb(theta)                              
    if not numpy.isfinite(lp_bounds):
        return -numpy.inf
    return lp_bounds + lp_perturb + lnlike(theta, y, gp)

################################
# Define other functions
################################

# chi-sqared
def chisqg(y_data, y_model, sd=None):
    chisq = numpy.nansum(((y_data-y_model)/sd)**2)
    return chisq

In [3]:
def getCARMAstats(file):
    ################################
    # setup
    ################################

    #file_name = file[5:-4]
    #file_name = file[5:-8]
    file_name = file[22:-8]
    
    # read-in light curve
    df = pandas.read_csv(file)

    # obtain values from df
    ra = df['ra'].values[0]
    dec = df['dec'].values[0]
    t = df['mjd'].values
    y_real = df['mag'].values
    yerr_real = df['magerr'].values
    lc_length = len(t)
    
    # invert the magnitudes
    y_real_inverted = (min(y_real)-y_real)

    # normalize to unit standard deviation and zero mean
    y = (y_real_inverted - numpy.mean(y_real_inverted))/numpy.std(y_real_inverted)
    yerr = yerr_real/numpy.std(y_real_inverted)
        
    
    ################################
    ################################
    #
    # DRW Process
    #
    ################################
    ################################
    
    # obtain best-fit
    bounds = [(0.01, 10.0), (0.01, 10.0)]
    best_drw = eztao.ts.drw_fit(t, y, yerr, user_bounds=bounds)
    
    def get_carma_parameter(log_tau, log_amp):
        """Get DRW parameters in CARMA notation (alpha_*/beta_*).

        alpha_1 = -1 / tau
        sigma^2 = tau * sigma_kbs^2 / 2
        sigma_kbs = np.sqrt( 2 * sigma^2 / tau )
        beta_0 = sigma_kbs

        Returns:
            [alpha_1, beta_0].
        """
        return [-1.0 / numpy.exp(log_tau), numpy.sqrt( 2.0 * numpy.exp(log_amp)**2.0 / numpy.exp(log_tau))]
    
    # get best-fit in CARMA space
    best_drw_arma = numpy.exp(get_carma_parameter(best_drw[0], best_drw[1]))
    
    
    ################################
    ################################
    #
    # DHO Process
    #
    ################################
    ################################
    
    # obtain best-fit
    bounds = [(-15, 15), (-15, 15), (-20, 10), (-20, 10)]
    best_dho = eztao.ts.dho_fit(t, y, yerr, user_bounds=bounds)

    # Create the GP model -- instead of creating a "model" function that is then called by the "lnlike" function from tutorial,
    #  we will create a GP that will be passed as an argument to the MCMC sampler. This will be the "gp" that is passed to
    #  the "lnprob" and "param_ll" functions
    dho_kernel = eztao.carma.DHO_term(*numpy.log(best_dho))
    dho_gp = celerite.GP(dho_kernel, mean=numpy.median(y))
    dho_gp.compute(t, yerr)

    ################################
    # MCMC
    ################################

    # Initalize MCMC
    data = (t, y, yerr)
    nwalkers = 128
    niter = 2048

    initial = numpy.array(numpy.log(best_dho))
    ndim = len(initial)
    p0 = [numpy.array(initial) + 1e-7 * numpy.random.randn(ndim) for i in range(nwalkers)]

    # Create the MCMC sampler -- note that the GP is passed as an argument in addition to the data
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=[y, dho_gp])

    # run a burn-in surrounding the best-fit parameters obtained above
    p0, lp, _ = sampler.run_mcmc(p0, 200)
    sampler.reset()

    # clear up the stored chain from burn-in, rerun the MCMC
    pos, prob, state = sampler.run_mcmc(p0, niter);

    ################################
    # Obtain the Best Fit: theta_max
    ################################

    # put all the samples that explored in MCMC into a single array
    samples = sampler.flatchain
    
    # find the parameters that have the best fit 
    theta_max_index = numpy.argmax(sampler.flatlnprobability)
    theta_max_probability = sampler.flatlnprobability[theta_max_index]
   
    theta_max  = samples[theta_max_index] # these are in log-space
    theta_max_norm = numpy.exp(theta_max) # take the exponent to get into 'normal' space
    
    
    ################################
    ################################
    #
    # Simulate and Return
    #
    ################################
    ################################
    
    ################################
    # Simulate and plot light curves
    ################################
    
    # create simulated light curve
    drw_sim_t, drw_sim_y, drw_sim_yerr = eztao.ts.carma_sim.pred_lc(t, y, yerr, best_drw_arma, 1, t)
    dho_sim_t, dho_sim_y, dho_sim_yerr = eztao.ts.carma_sim.pred_lc(t, y, yerr, theta_max_norm, 2, t)
    
    # directory to save plots to
    plot_dir = 'plots-and-figures/carma_plots'
    # plot drw
    plot = True    
    if plot:
        matplotlib.pyplot.figure()
        matplotlib.pyplot.errorbar(t, y, yerr=yerr, label='data',
                                   linestyle="None", marker='.', ms=3., color='purple', ecolor='0.8')
        matplotlib.pyplot.plot(drw_sim_t, drw_sim_y, label=f'drw {best_drw}')
        matplotlib.pyplot.legend()
        matplotlib.pyplot.savefig(f'{plot_dir}/{file_name}_drw_fit.png')
        matplotlib.pyplot.close()

        # plot dho
        matplotlib.pyplot.figure()
        matplotlib.pyplot.errorbar(t, y, yerr=yerr, label='data',
                                   linestyle="None", marker='.', ms=3., color='purple', ecolor='0.8')
        matplotlib.pyplot.plot(dho_sim_t, dho_sim_y, label=f'dho {theta_max_norm}')
        matplotlib.pyplot.legend()
        matplotlib.pyplot.savefig(f'{plot_dir}/{file_name}_dho_fit.png')
        matplotlib.pyplot.close()
    
    ################################
    # Determine best fit
    ################################
    
    # get chi-squared from sim light curves
    chisq_drw = chisqg(y, drw_sim_y, yerr)
    chisq_dho = chisqg(y, dho_sim_y, yerr)
    
    # determine best fit
    best_fit = 'DRW'
    if chisq_drw > chisq_dho and not numpy.isinf(chisq_dho):
        best_fit = 'DHO'
    
    ################################
    # Return
    ################################
    
    return file_name, ra, dec, t, y_real, yerr_real, best_drw, best_drw_arma, chisq_drw, best_dho, theta_max_norm, theta_max_probability, chisq_dho, best_fit, lc_length

In [4]:
ppservers = ()

# creates jobserver with ncpus workers
ncpus = 24
job_server = pp.Server(ncpus, ppservers=ppservers)

print("Starting pp with", job_server.get_ncpus(), "workers")

# get list of data files
#repository = glob.glob('data/*.csv')
repository = glob.glob('../../AGN_LightCurves/*.parquet')

# intialize lists to save to
file_names = []
times = []
magnitudes = []
mag_errors = []
ras = []
decs =[]
best_fit_drws = []
best_fit_drws_arma = []
best_fit_dhos = []
best_mcmc_dhos = []
dho_probabilities = []
chi_squared_drw = []
chi_squared_dho = []
best_fits = []
lc_lengths = []

# Submit a list of jobs running getCARMAstats for each file in repository
# getCARMAstats - the function
# (file,) - file with AGN lc
# (chisqg, ...) - tuple with functions on which getCARMAstats depends
# ("numpy", ...) - tuple with package dependencies to be imported
jobs = [(file, job_server.submit(getCARMAstats ,(file,), 
                                 (chisqg, lnprior_perturb, lnprior_bounds, lnlike, lnprob,), 
                                 ("numpy", "matplotlib.pyplot", "pandas", "emcee", "eztao", "eztao.ts",
                                  "celerite"))) for file in repository]

job_num = 1
for file, job in jobs:
    # start job
    file_name, ra, dec, t, y, yerr, best_drw, best_drw_arma, chisq_drw, best_dho, best_mcmc_dho, dho_probability, chisq_dho, best_fit, lc_length = job()
        
    # save data from job
    file_names.append(file_name)
    ras.append(ra)
    decs.append(dec)
    times.append(t)
    magnitudes.append(y)
    mag_errors.append(yerr)
    best_fit_drws.append(best_drw)
    best_fit_drws_arma.append(best_drw_arma)
    chi_squared_drw.append(chisq_drw)
    best_fit_dhos.append(best_dho)
    best_mcmc_dhos.append(best_mcmc_dho)
    dho_probabilities.append(dho_probability)
    chi_squared_dho.append(chisq_dho)
    best_fits.append(best_fit)
    lc_lengths.append(lc_length)
    
    #print(f'Completed [{job_num}/{len(jobs)}]: {file_name}')
    job_num += 1

job_server.print_stats()

Starting pp with 24 workers
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
  v

In [5]:
agn_fit_data = pandas.DataFrame({'Filenames': file_names, 'RA': ras, 'DEC': decs, 'Times (MJD)': times, 
                                 'Magnitudes': magnitudes, 'Mag Errors': mag_errors, 
                                 'Best DRW Fit': best_fit_drws, 'Best DRW ARMA Fit': best_fit_drws_arma, 'DRW_chi_sq': chi_squared_drw,
                                 'Best DHO Fit': best_fit_dhos, 'DHO MCMC Fit': best_mcmc_dhos, 'DHO MCMC Probability': dho_probabilities, 'DHO_chi_sq': chi_squared_dho,
                                 'Best Fit': best_fits, 'LC Length': lc_lengths})

# save dataframe
agn_fit_data.to_csv('agn_fit_data.csv')
agn_fit_data

Unnamed: 0,Filenames,RA,DEC,Times (MJD),Magnitudes,Mag Errors,Best DRW Fit,Best DRW ARMA Fit,DRW_chi_sq,Best DHO Fit,DHO MCMC Fit,DHO MCMC Probability,DHO_chi_sq,Best Fit,LC Length
0,0.2032_-7.1532_ztfg,0.203240,-7.153253,"[58283.4428935, 58286.4428704, 58295.4461921, ...","[17.516969699999997, 17.4566288, 17.5868243999...","[0.0247562788, 0.0239279121, 0.0257828757, 0.0...","[1.010050167084168, 1.39835956503435]","[0.6947517383596952, 31.66765493572173]",1.004287e+00,"[0.8052989860660189, 349.8675370514253, 0.0092...","[1.0572923542024664, 3833.5761429196955, 0.071...",-242.424975,39.761051,DRW,183
1,0.8643_27.6548_ztfg,0.864381,27.654870,"[58263.4847338, 58271.4756597, 58274.4529861, ...","[18.0273285, 17.6561546, 17.992786399999996, 1...","[0.0269577354, 0.0219427999, 0.0264183898, 0.0...","[1.2333921531936225, 1.010050167084168]","[0.7472893972361795, 8.131813354533962]",1.655330e+02,"[1065.8194928399669, 71.454568468059, 170.1112...","[1085.6174986961441, 79.64783078925991, 183.30...",-537.939248,6.372386,DHO,390
2,1.0082_70.3217_ztfg,1.008609,70.321808,"[58252.467963, 58252.4684838, 58255.4317361, 5...","[20.3061562, 20.148098, 19.9521751, 20.8079872...","[0.129920244, 0.141349345, 0.124755882, 0.1965...","[1.0500751699132727, 2.7588872966218947]","[0.7047504959175069, 541839.8279942377]",2.174904e-16,"[2.2824730178059918, 0.01944285573258857, 0.22...","[2.197298193466844, 0.018488669671634036, 0.21...",-417.572100,114.615329,DRW,306
3,1.5813_20.2029_ztfg,1.581381,20.202983,"[58268.4661921, 58277.4619213, 58280.447419, 5...","[14.7221251, 14.7285233, 14.7703943, 14.734273...","[0.0106572807, 0.0106579475, 0.0106633035, 0.0...","[1.1950735625854267, 53.87493704606068]","[0.7388342392539888, inf]",0.000000e+00,"[18.000567446715387, 0.18199376168304757, 3.18...","[17.718655503234242, 0.17777448886989983, 3.14...",-105.614453,117.806097,DRW,366
4,10.72_-23.541_ztfg,10.720075,-23.540989,"[58330.4594676, 58333.4387037, 58336.4817477, ...","[15.3464823, 15.387213699999998, 15.394475, 15...","[0.0196861867, 0.0197297633, 0.0197379421, 0.0...","[1.1488679567370212, 2.240470898973126]","[0.7283340917143439, 1777.2559826028898]",7.175138e-09,"[5.365771177051134, 0.12296559891771136, 0.995...","[5.408971161663187, 0.11988837336657512, 0.993...",-171.116993,16.399755,DRW,133
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382,90.5437_28.4721_ztfg,90.543631,28.472096,"[58204.1298495, 58204.17239580001, 58205.12875...","[15.1023655, 15.1037445, 15.0927429, 15.091063...","[0.0117217041999999, 0.0117235724, 0.011708719...","[1.010050167084168, 599.9128236219965]","[0.6947517383596952, inf]",0.000000e+00,"[0.17662257025289357, 0.0005817146649354154, 0...","[0.17424749202147324, 0.0005529663959402024, 0...",151.769019,131.463417,DRW,225
383,90.6581_65.3712_ztfg,90.657663,65.371379,"[58204.19048609999, 58207.247662, 58207.256643...","[15.5720406, 15.8300734, 15.726823800000002, 1...","[0.013086074, 0.0136499358, 0.0134064704, 0.01...","[3.404449233263794, 1.010050167084168]","[0.9673207773069852, 2.029511128240606]",7.210180e+02,"[504.8845997814039, 7.682873098642266, 30.9095...","[512.7874337392082, 7.81298609943269, 31.31190...",-401.589489,4.588137,DHO,298
384,91.454_-27.9112_ztfg,91.454290,-27.910989,"[58205.1373843, 58386.5221412, 58423.4174653, ...","[17.092571300000003, 17.559841199999997, 17.23...","[0.0260783136, 0.0326710902, 0.0278091952, 0.0...","[1.0573037240968683, 63.689950700017015]","[0.706528872130536, inf]",0.000000e+00,"[9.247739106326593, 0.03617814549956453, 0.798...","[12.580259403676884, 0.04970343900332802, 1.08...",-29.749131,15.502418,DRW,92
385,93.9015_71.0375_ztfg,93.901677,71.037576,"[58204.1684606, 58204.1900116, 58207.2527199, ...","[14.4023151, 14.4292612, 14.5862532, 14.610807...","[0.0121623082, 0.0121789305, 0.0122852838, 0.0...","[1.0527374918338386, 1.010050167084168]","[0.7054064571219747, 9.913561193975553]",3.019359e+01,"[155.80213110357056, 3.1810177238781865, 11.13...","[4.282044551062953, 312764.2068114378, 0.01817...",-408.440094,15.380605,DHO,306


### Combine Dataframes

In [1]:
import numpy as np
import pandas as pd

In [2]:
# read data frame
agn_fit_data = pd.read_csv("agn_fit_data.csv")

# read in properties data frame
agn_properties = pd.read_csv("BAT_AGN_BASS_Data.csv")

In [4]:
def round_colemn(df, key, decimals=3):
    df[key] = np.around(df[key].tolist(), decimals=decimals)
    
    # note: return statement is redundant since Python is pass-by-reference
    # but return is generally good practice
    return df

In [5]:
decimal_places = 1

# round fit RA and DEC
agn_fit_data = round_colemn(agn_fit_data, 'RA', decimal_places)
agn_fit_data = round_colemn(agn_fit_data, 'DEC', decimal_places)

# round properties RA and DEC
agn_properties = round_colemn(agn_properties, 'RA', decimal_places)
agn_properties = round_colemn(agn_properties, 'DEC', decimal_places)

In [6]:
# merge dataframes
agn_dataframe = pd.merge(agn_fit_data, agn_properties, how='left', on=['RA', 'DEC'], validate='one_to_one')

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save dataframe
agn_dataframe.to_csv('agn_dataframe.csv')

### Timescales

In [None]:
def dho_timescales(params):
    """Compute a couple DHO timescales from CARMA parameters.

    - damping factor
    - decay timescale
    - rise/damped QPO timescale
    - perturbation timescale
    - decorrelation timescale
    - natural oscillation frequency
    """
   
    # expand params
    a1, a2, b0, b1 = params  

    # damping factor & natural frequency
    xi = a1/(2*np.sqrt(a2))
    omega_0 = np.sqrt(a2)   

    # placeholder for two timescales
    tau_perturb = b1/b0
    tau_decay = 0
    tau_rise_dqpo = 0
    tau_decorr = 0

    roots = np.roots([1, a1, a2])
    if xi < 1:
        tau_decay = np.abs(1/roots[0].real)
        tau_rise_dqpo = 2*np.pi*np.abs(1/roots[0].imag)/np.sqrt(1 - xi**2)
        tau_decorr = (np.pi/2)*np.pi*2/omega_0
    else:
        tau_decay = np.abs(1/np.max(roots.real))
        tau_rise_dqpo = np.abs(1/np.min(roots.real))
        tau_decorr = (tau_decay + tau_rise_dqpo)*np.pi/2
 
    return np.array([xi, tau_decay, tau_rise_dqpo, tau_perturb, tau_decorr, omega_0])