# CARMA & MCMC process on Forced Photometry of AGN

## Table of Contents
### Chapter 1: Solving Problems
> #### 1.1 How to select specific LC from FP dataframe
> #### 1.2 How to Identify/Remove nan indices in FP LCs
### Chapter 2: Parallel Python

- - -
- - -

## Chapter 1: Solving Problems
This chapter is about solving the individual problems faced when converting the PP code to working with FP LC

### 1.1 How to select specific LC from FP dataframe
(i.e. sort via filter & field)

In [1]:
import numpy as np
import pandas as pd

In [2]:
filename = 'BAT_AGN_ZTF_ForcedPhotometry_LightCurves_AllBands.parquet'
fp_dataframe = pd.read_parquet(filename)

field = '389'
filter_type = 'g'

df = fp_dataframe.loc[fp_dataframe['filter'] == filter_type]
df = df.loc[df['field'] == field]
df
#print(df['RA'].to_numpy()[0])
#print(df['JD'].to_numpy()[0])

Unnamed: 0,RA,DEC,JD,mag,magerr,filter,field
1045,317.2915437,-9.6707397,"[2458259.9212153, 2458263.9602315, 2458267.938...","[18.2467226735013, nan, nan, nan, nan, nan, 16...","[0.0477328813841479, -0.0566556623768301, -0.5...",g,389


### 1.2 How to Identify/Remove nan indices in FP LCs
Masking with np.where

In [None]:
good_ind = np.where((row['mag'] > 10) & (row['mag'] < 30))[0]

t = row['JD'][good_ind]
y = row['mag'][good_ind]
e = row['magerr'][good_ind]

### 1.3 Exclude LC that don't eet minimum length

In [1]:
import numpy
import pandas

In [9]:
# read-in FP df
file = 'BAT_AGN_ZTF_ForcedPhotometry_LightCurves_AllBands.parquet'
df = pandas.read_parquet(file)

# create list of keys based on a given filter
filter_type = 'g'
keys = []
for index, row in df.iterrows():
    # check if the row matches the filter_type
    if row['filter'] == filter_type:
        min_length = 2
        
        # check if the LC meets the minimum length (excluding indices with outliers)
        good_ind = numpy.where((row['mag'] > 10) & (row['mag'] < 30))[0]
        t = row['JD'][good_ind]
        if len(t) >= min_length:
            keys.append([file, row['RA'], row['DEC'], filter_type, row['field'], min_length])

# raise runtime exception if no keys are generated
if len(keys) == 0:
    raise ValueError(f'No keys were found: keys = {keys}')

253
185


- - -
- - -

## Chapter 2: Parallel Python

#### Imports

In [1]:
import numpy
import matplotlib.pyplot
import pandas
import glob
import emcee

import eztao
import eztao.ts

import celerite

import pp

#### Dependent Functions

In [2]:
################################
# Define CARMA function for DRW
################################

def get_carma_parameter(tau, amp):
    """Get DRW parameters in CARMA notation (alpha_*/beta_*).

    alpha_1 = -1 / tau
    sigma^2 = tau * sigma_kbs^2 / 2
    sigma_kbs = np.sqrt( 2 * sigma^2 / tau )
    beta_0 = sigma_kbs

    Returns:
        [alpha_1, beta_0].
    """
    return [-1.0 / tau, numpy.sqrt( 2.0 * amp**2.0 / tau)]

################################
# Define the prior and log-probability functions for MCMC
################################

# prior function for tau_perturb
def lnprior_perturb(theta):
    """Prior on perturbation timescale. Note: this is a wedge like prior."""

    # determine DHO timescales
    log10_tau_perturb = (theta[-1] - theta[-2])/numpy.log(10)
    if -3 <= log10_tau_perturb <= 5:
        prior = 0
    else:
        prior = -(numpy.abs(log10_tau_perturb - 1) - 4)

    return prior

def lnprior_bounds(theta):
    """Prior on AR and MA parameters. This is a flat prior."""

    # Place some bounds on the parameter space
    bounds_low = numpy.array([-15, -15, -20, -20])
    bounds_high = numpy.array([15, 15, 10, 10])

    log_a1, log_a2, log_b0, log_b1 = theta
    if ( 
        bounds_low[0] < log_a1 < bounds_high[0] 
        and bounds_low[1] < log_a2 < bounds_high[1] 
        and bounds_low[2] < log_b0 < bounds_high[2] 
        and bounds_low[3] < log_b1 < bounds_high[3] 
       ):
        return 0.0
    return -numpy.inf

# We'll use the eztao version which effectively returns "gp.log_likelihood" from the GP and np.inf otherwise
def lnlike(theta, y, gp):
    return -eztao.ts.neg_param_ll(theta, y, gp)

def lnprob(theta, y, gp):
    lp_bounds = lnprior_bounds(theta)
    lp_perturb = lnprior_perturb(theta)                              
    if not numpy.isfinite(lp_bounds):
        return -numpy.inf
    return lp_bounds + lp_perturb + lnlike(theta, y, gp)

################################
# Define other functions
################################

# chi-sqared
def chisqg(y_data, y_model, sd=None):
    chisq = numpy.nansum(((y_data-y_model)/sd)**2)
    return chisq

#### CARMA Process

In [3]:
# Pass string list key where:
# - key[0] = dataframe filename + location
# - key[1] = RA
# - key[2] = DEC
# - key[3] = filter (i.e r, g, b, i) 
# - key[4] = field
# - key[5] = minimum LC length
def getCARMAstats(key):
    ################################
    # setup
    ################################
    
    # read-in FP df
    df = pandas.read_parquet(key[0])
    
    # grab row in df that has the same filter and field
    df = df.loc[df['RA'] == key[1]]
    df = df.loc[df['DEC'] == key[2]]
    df = df.loc[df['filter'] == key[3]]
    df = df.loc[df['field'] == key[4]]
    
    # obtain values from df
    ra = df['RA'].to_numpy()[0]
    dec = df['DEC'].to_numpy()[0]
    t = df['JD'].to_numpy()[0]
    y_real = df['mag'].to_numpy()[0]
    yerr_real = df['magerr'].to_numpy()[0]
    
    # exclude indicies with nan's or outliers
    good_ind = numpy.where((y_real > 10) & (y_real < 30))[0]
    t = t[good_ind]
    y_real = y_real[good_ind]
    yerr_real = yerr_real[good_ind]
    
    # invert the magnitudes
    y_real_inverted = (min(y_real)-y_real)

    # normalize to unit standard deviation and zero mean
    y = (y_real_inverted - numpy.mean(y_real_inverted))/numpy.std(y_real_inverted)
    yerr = yerr_real/numpy.std(y_real_inverted)
    
    # generate filename of FP LC
    file_name = f'fp_lc_{ra}_{dec}_field_{key[4]}.csv'
        
    # assert LC meets minimum length
    lc_length = len(t)
    assert lc_length >= key[5], (f'{file_name} does not meet minimum length of {key[5]}: lc_length = {lc_length}')
    
    
    ################################
    ################################
    #
    # DRW Process
    #
    ################################
    ################################
    
    # obtain best-fit
    bounds = [(0.01, 10.0), (0.01, 10.0)]
    best_drw = eztao.ts.drw_fit(t, y, yerr, user_bounds=bounds)
    
    # get best-fit in CARMA space
    best_drw_arma = numpy.exp(get_carma_parameter(best_drw[0], best_drw[1]))
    
    
    ################################
    ################################
    #
    # DHO Process
    #
    ################################
    ################################
    
    # obtain best-fit
    bounds = [(-15, 15), (-15, 15), (-20, 10), (-20, 10)]
    best_dho = eztao.ts.dho_fit(t, y, yerr, user_bounds=bounds)

    # Create the GP model -- instead of creating a "model" function that is then called by the "lnlike" function from tutorial,
    #  we will create a GP that will be passed as an argument to the MCMC sampler. This will be the "gp" that is passed to
    #  the "lnprob" and "param_ll" functions
    dho_kernel = eztao.carma.DHO_term(*numpy.log(best_dho))
    dho_gp = celerite.GP(dho_kernel, mean=numpy.median(y))
    dho_gp.compute(t, yerr)

    ################################
    # MCMC
    ################################

    # Initalize MCMC
    data = (t, y, yerr)
    nwalkers = 128
    niter = 2048

    initial = numpy.array(numpy.log(best_dho))
    ndim = len(initial)
    p0 = [numpy.array(initial) + 1e-7 * numpy.random.randn(ndim) for i in range(nwalkers)]

    # Create the MCMC sampler -- note that the GP is passed as an argument in addition to the data
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=[y, dho_gp])

    # run a burn-in surrounding the best-fit parameters obtained above
    p0, lp, _ = sampler.run_mcmc(p0, 200)
    sampler.reset()

    # clear up the stored chain from burn-in, rerun the MCMC
    pos, prob, state = sampler.run_mcmc(p0, niter);

    ################################
    # Obtain the Best Fit: theta_max
    ################################

    # put all the samples that explored in MCMC into a single array
    samples = sampler.flatchain
    
    # find the parameters that have the best fit 
    theta_max_index = numpy.argmax(sampler.flatlnprobability)
    theta_max_probability = sampler.flatlnprobability[theta_max_index]
   
    theta_max  = samples[theta_max_index] # these are in log-space
    theta_max_norm = numpy.exp(theta_max) # take the exponent to get into 'normal' space
    
    
    ################################
    ################################
    #
    # Simulate and Return
    #
    ################################
    ################################
    
    ################################
    # Simulate and plot light curves
    ################################
    
    # create simulated light curve
    drw_sim_t, drw_sim_y, drw_sim_yerr = eztao.ts.carma_sim.pred_lc(t, y, yerr, best_drw_arma, 1, t)
    dho_sim_t, dho_sim_y, dho_sim_yerr = eztao.ts.carma_sim.pred_lc(t, y, yerr, theta_max_norm, 2, t)
    
    # directory to save plots to
    plot_dir = 'carma_plots'
    # plot drw
    plot = True  
    if plot:
        matplotlib.pyplot.figure()
        matplotlib.pyplot.errorbar(t, y, yerr=yerr, label='data',
                                   linestyle="None", marker='.', ms=3., color='purple', ecolor='0.8')
        matplotlib.pyplot.plot(drw_sim_t, drw_sim_y, label=f'drw {best_drw_arma}')
        matplotlib.pyplot.legend()
        matplotlib.pyplot.savefig(f'{plot_dir}/{file_name}_drw_fit.png')
        matplotlib.pyplot.close()

        # plot dho
        matplotlib.pyplot.figure()
        matplotlib.pyplot.errorbar(t, y, yerr=yerr, label='data',
                                   linestyle="None", marker='.', ms=3., color='purple', ecolor='0.8')
        matplotlib.pyplot.plot(dho_sim_t, dho_sim_y, label=f'dho {theta_max_norm}')
        matplotlib.pyplot.legend()
        matplotlib.pyplot.savefig(f'{plot_dir}/{file_name}_dho_fit.png')
        matplotlib.pyplot.close()
    
    ################################
    # Determine best fit
    ################################
    
    # get chi-squared from sim light curves
    chisq_drw = chisqg(y, drw_sim_y, yerr)
    chisq_dho = chisqg(y, dho_sim_y, yerr)
    
    # determine best fit
    best_fit = 'DRW'
    if chisq_drw > chisq_dho and not numpy.isinf(chisq_dho):
        best_fit = 'DHO'
    
    ################################
    # Return
    ################################
    
    return file_name, ra, dec, key[4], t, y_real, yerr_real, best_drw, best_drw_arma, chisq_drw, best_dho, theta_max_norm, theta_max_probability, chisq_dho, best_fit, lc_length

#### Initialization & Parallel Python

In [None]:
ppservers = ()

# creates jobserver with ncpus workers
ncpus = 24
job_server = pp.Server(ncpus, ppservers=ppservers)

print("Starting pp with", job_server.get_ncpus(), "workers")

# read-in FP df
file = 'BAT_AGN_ZTF_ForcedPhotometry_LightCurves_AllBands.parquet'
df = pandas.read_parquet(file)

# create list of keys based on a given filter
filter_type = 'g'
keys = []
for index, row in df.iterrows():
    # check if the row matches the filter_type
    if row['filter'] == filter_type:
       
        # minimum allowed LC length
        min_length = 10
        
        # check if the LC meets the minimum length (excluding indices with outliers)
        good_ind = numpy.where((row['mag'] > 10) & (row['mag'] < 30))[0]
        t = row['JD'][good_ind]
        if len(t) >= min_length:
            keys.append([file, row['RA'], row['DEC'], filter_type, row['field'], min_length])

# raise runtime exception if no keys are generated
if len(keys) == 0:
    raise ValueError(f'No keys were found: keys = {keys}')
    
# intialize lists to save to
file_names = []
ras = []
decs =[]
fields = []
times = []
magnitudes = []
mag_errors = []
best_fit_drws = []
best_fit_drws_arma = []
best_fit_dhos = []
best_mcmc_dhos = []
dho_probabilities = []
chi_squared_drw = []
chi_squared_dho = []
best_fits = []
lc_lengths = []

# Submit a list of jobs running getCARMAstats for each file in repository
# getCARMAstats - the function
# (key,) - [filter_type, field] function parameter
# (chisqg, ...) - tuple with functions on which getCARMAstats depends
# ("numpy", ...) - tuple with package dependencies to be imported
jobs = [(key, job_server.submit(getCARMAstats ,(key,), 
                                 (get_carma_parameter, lnprior_perturb, lnprior_bounds, lnlike, lnprob, chisqg,), 
                                 ("numpy", "matplotlib.pyplot", "pandas", "emcee", "eztao", "eztao.ts",
                                  "celerite"))) for key in keys]

job_num = 1
for file, job in jobs:
    # start job
    file_name, ra, dec, field, t, y, yerr, best_drw, best_drw_arma, chisq_drw, best_dho, best_mcmc_dho, dho_probability, chisq_dho, best_fit, lc_length = job()
        
    # save data from job
    file_names.append(file_name)
    ras.append(ra)
    decs.append(dec)
    fields.append(field)
    times.append(t)
    magnitudes.append(y)
    mag_errors.append(yerr)
    best_fit_drws.append(best_drw)
    best_fit_drws_arma.append(best_drw_arma)
    chi_squared_drw.append(chisq_drw)
    best_fit_dhos.append(best_dho)
    best_mcmc_dhos.append(best_mcmc_dho)
    dho_probabilities.append(dho_probability)
    chi_squared_dho.append(chisq_dho)
    best_fits.append(best_fit)
    lc_lengths.append(lc_length)
    
    print(f'Completed [{job_num}/{len(jobs)}]: {file_name}')
    job_num += 1

job_server.print_stats()

#### Create and Save Dataframe with Results

In [5]:
agn_fit_data = pandas.DataFrame({'Filenames': file_names, 'RA': ras, 'DEC': decs, 'field': fields, 'Times (JD)': times, 
                                 'Magnitudes': magnitudes, 'Mag Errors': mag_errors, 
                                 'Best DRW Fit': best_fit_drws, 'Best DRW ARMA Fit': best_fit_drws_arma, 'DRW chisq': chi_squared_drw,
                                 'Best DHO Fit': best_fit_dhos, 'DHO MCMC Fit': best_mcmc_dhos, 'DHO MCMC Probability': dho_probabilities, 'DHO chisq': chi_squared_dho,
                                 'Best Fit': best_fits, 'LC Length': lc_lengths})

# save dataframe
agn_fit_data.to_parquet('ANG_FP_g_FitData.parquet')
agn_fit_data

Unnamed: 0,Filenames,RA,DEC,field,Times (JD),Magnitudes,Mag Errors,Best DRW Fit,Best DRW ARMA Fit,DRW chisq,Best DHO Fit,DHO MCMC Fit,DHO MCMC Probability,DHO chisq,Best Fit,LC Length
0,fp_lc_0.20323455_-7.1532089_field_395.csv,0.20323455,-7.1532089,395,"[2458283.9428935, 2458318.918287, 2458322.9398...","[22.075839486275363, 21.24175510232971, 23.134...","[0.9112273425746468, 0.3438406481866066, 1.602...","[1.010050167084168, 462.71394825572787]","[0.3715581744238082, 5.951416634004988e+282]",0.000000e+00,"[0.004369020709044826, 1125.9617284680944, 0.0...","[0.022383066414079045, 573407.5764188938, 25.9...",-56.366370,12.456208,DRW,42
1,fp_lc_0.8642925999999999_27.654793_field_1645.csv,0.8642925999999999,27.654793,1645,"[2458314.9841204, 2458314.9845718, 2458337.903...","[21.776049719004373, 22.69602698749315, 27.083...","[0.4399298523147267, 1.1095079097321627, 43.46...","[1.010050167084168, 27.10926243648254]","[0.3715581744238082, 3.690316082315483e+16]",9.226263e-24,"[0.009439438528840598, 1006.9500487712055, 4.9...","[0.008981866743041536, 1006.9901252952062, 7.3...",-52.041678,8.467355,DRW,39
2,fp_lc_0.8642925999999999_27.654793_field_600.csv,0.8642925999999999,27.654793,600,"[2458263.9847338, 2458289.9331134, 2458295.950...","[23.81327311535296, 22.15362923336867, 22.3145...","[12.396324456863823, 0.5551943587070858, 1.398...","[1.010050167084168, 2.72817389590233]","[0.3715581744238082, 46.477929660070956]",7.387107e-01,"[2.57672343809326, 0.00016315923536505336, 0.0...","[2.7059257131587873, 0.00022721912988140527, 0...",-476.174173,107.202775,DRW,333
3,fp_lc_1.0082763_70.3217215_field_1880.csv,1.0082763,70.3217215,1880,"[2458387.7687616, 2458390.9042245, 2458443.747...","[22.514904553587147, 23.036754506803423, 22.70...","[1.8071456097826883, 3.0001561743848435, 2.814...","[1.010050167084168, 22026.465794806718]","[0.3715581744238082, inf]",0.000000e+00,"[3.059023205018258e-07, 0.0523881466579054, 1....","[6.944528333472568e-07, 0.05242428700298561, 1...",-51.653434,6.754478,DRW,31
4,fp_lc_1.0082763_70.3217215_field_833.csv,1.0082763,70.3217215,833,"[2458246.9853009, 2458252.967963, 2458258.9859...","[21.56465887440168, 22.33818025318398, 23.7592...","[0.6526396260172993, 0.8128131225566738, 7.155...","[1.010050167084168, 1858.4863062400623]","[0.3715581744238082, inf]",0.000000e+00,"[0.026008617693305747, 3.059023205018258e-07, ...","[0.021395585705204667, 3.1823259476193795e-07,...",-465.097609,118.576383,DRW,290
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
509,fp_lc_90.658166_65.3713197_field_1845.csv,90.658166,65.3713197,1845,"[2458747.0154861, 2458747.0159375, 2458749.987...","[21.01370656765534, 20.29993978770297, 20.8846...","[0.354977227605443, 0.1838833746256927, 0.2794...","[1.010050167084168, 1.010050167084168]","[0.3715581744238082, 4.142511876711821]",1.136832e+01,"[0.0033328069858809665, 1608.4004186159357, 0....","[0.003741420703211402, 1608.410308647447, 1.66...",-31.469185,21.397923,DRW,26
510,fp_lc_90.658166_65.3713197_field_812.csv,90.658166,65.3713197,812,"[2458210.7471412, 2458227.6813194, 2458231.703...","[18.431935844236552, 21.339635020904225, 21.45...","[0.090329115198733, 0.5138602848328698, 0.8363...","[1.010050167084168, 34.93058497504824]","[0.3715581744238082, 2.2225162253362457e+21]",1.716969e-24,"[465.15388672961416, 0.22557692474601224, 5.16...","[498.988149220759, 0.27017298627422526, 5.7213...",-227.266949,12.316446,DRW,221
511,fp_lc_93.9014798_71.03749785_field_838.csv,93.9014798,71.03749785,838,"[2458237.6971065, 2458341.0046065, 2458346.987...","[19.97050336003658, 17.993713800570976, 19.023...","[0.3954985249581671, 0.0688176983721745, 0.059...","[2.1474293189146665, 1.010050167084168]","[0.62771246957919, 2.6505357916837275]",1.705648e+02,"[388.0271665984434, 52.115686599714266, 91.200...","[378.5076481932543, 52.79821768113118, 91.9993...",-296.611050,inf,DRW,237
512,fp_lc_98.19657765_63.6736939_field_1845.csv,98.19657765,63.6736939,1845,"[2458384.9114699, 2458427.8838426, 2458430.837...","[22.969026888219453, 19.99405330758357, 19.101...","[3.136789926126838, 0.1451415501118745, 0.0424...","[1.010050167084168, 27.49165090199506]","[0.3715581744238082, 6.32047220460054e+16]",1.009566e-23,"[125.35788224025931, 0.7978870880868593, 7.211...","[0.03353402348331843, 153061.89633031242, 6.49...",-10.585205,11.898146,DRW,30


- - -
- - -