# CARMA & MCMC process on Forced Photometry of AGN

## Table of Contents

### Introduction and Imports

### Chapter 1: Solving Problems

> #### 1.1 How to select specific LC from FP dataframe

> #### 1.2 How to Identify/Remove nan indices in FP LCs

> #### 1.3 Exclude LC's that don't meet minimum length

### Chapter 2: Parallel Python

> #### 2.1 Parallel Python

> #### 2.2 Best Fit Validation

### Chapter 3: Data Migration

> #### 3.1 Merge BASS AGN Properties

> #### 3.2 Class and Type

> #### 3.3 SNR

### Chapter 4: DHO Timescales

> #### 4.1 Calculate Timescales

> #### 4.2 Timescale Damping

> #### 4.3 "Bad Fits" Based on Timescales

### Appendix

> #### A: Functions

- - -
- - -

## Introduction and Imports

### Filter Type

Specifies the filter used from the forced photometry data. 

**This line is required in order to run most cells in this workbook.** The filter type is used when accessing, saving, and parsing dataframes and files

In [1]:
filter_type = 'r'

 - - -
 - - -

## Chapter 1: Solving Problems

This chapter is about solving the individual problems faced when converting the PP code to working with FP LC

### 1.1 How to select specific LC from FP dataframe
(i.e. sort via filter & field)

In [2]:
import numpy as np
import pandas as pd

In [2]:
filename = 'BAT_AGN_ZTF_ForcedPhotometry_LightCurves_AllBands.parquet'
fp_dataframe = pd.read_parquet(filename)

field = '389'
# filter_type = 'g'

df = fp_dataframe.loc[fp_dataframe['filter'] == filter_type]
df = df.loc[df['field'] == field]
df
#print(df['RA'].to_numpy()[0])
#print(df['JD'].to_numpy()[0])

Unnamed: 0,RA,DEC,JD,mag,magerr,filter,field
1045,317.2915437,-9.6707397,"[2458259.9212153, 2458263.9602315, 2458267.938...","[18.2467226735013, nan, nan, nan, nan, nan, 16...","[0.0477328813841479, -0.0566556623768301, -0.5...",g,389


### 1.2 How to Identify/Remove nan indices in FP LCs

Masking with np.where

In [None]:
good_ind = np.where((row['mag'] > 10) & (row['mag'] < 30))[0]

t = row['JD'][good_ind]
y = row['mag'][good_ind]
e = row['magerr'][good_ind]

### 1.3 Exclude LC's that don't meet minimum length

In [1]:
import numpy
import pandas

In [9]:
# read-in FP df
file = 'BAT_AGN_ZTF_ForcedPhotometry_LightCurves_AllBands.parquet'
df = pandas.read_parquet(file)

# create list of keys based on a given filter
filter_type = 'g'
keys = []
for index, row in df.iterrows():
    # check if the row matches the filter_type
    if row['filter'] == filter_type:
        min_length = 2
        
        # check if the LC meets the minimum length (excluding indices with outliers)
        good_ind = numpy.where((row['mag'] > 10) & (row['mag'] < 30))[0]
        t = row['JD'][good_ind]
        if len(t) >= min_length:
            keys.append([file, row['RA'], row['DEC'], filter_type, row['field'], min_length])

# raise runtime exception if no keys are generated
if len(keys) == 0:
    raise ValueError(f'No keys were found: keys = {keys}')

253
185


- - -
- - -

## Chapter 2: Parallel Python

### 2.1 Parallel Python

#### Imports

In [2]:
import numpy
import matplotlib.pyplot
import pandas
import glob
import emcee

import eztao
import eztao.ts

import celerite

import pp

#### Dependent Functions

In [3]:
################################
# Define CARMA function for DRW
################################

def get_carma_parameter(tau, amp):
    """Get DRW parameters in CARMA notation (alpha_*/beta_*).

    alpha_1 = -1 / tau
    sigma^2 = tau * sigma_kbs^2 / 2
    sigma_kbs = np.sqrt( 2 * sigma^2 / tau )
    beta_0 = sigma_kbs

    Returns:
        [alpha_1, beta_0].
    """
    return [-1.0 / tau, numpy.sqrt( 2.0 * amp**2.0 / tau)]

################################
# Define the prior and log-probability functions for MCMC
################################

# prior function for tau_perturb
def lnprior_perturb(theta):
    """Prior on perturbation timescale. Note: this is a wedge like prior."""

    # determine DHO timescales
    log10_tau_perturb = (theta[-1] - theta[-2])/numpy.log(10)
    if -3 <= log10_tau_perturb <= 5:
        prior = 0
    else:
        prior = -(numpy.abs(log10_tau_perturb - 1) - 4)

    return prior

def lnprior_bounds(theta):
    """Prior on AR and MA parameters. This is a flat prior."""

    # Place some bounds on the parameter space
    bounds_low = numpy.array([-15, -15, -20, -20])
    bounds_high = numpy.array([15, 15, 10, 10])

    log_a1, log_a2, log_b0, log_b1 = theta
    if ( 
        bounds_low[0] < log_a1 < bounds_high[0] 
        and bounds_low[1] < log_a2 < bounds_high[1] 
        and bounds_low[2] < log_b0 < bounds_high[2] 
        and bounds_low[3] < log_b1 < bounds_high[3] 
       ):
        return 0.0
    return -numpy.inf

# We'll use the eztao version which effectively returns "gp.log_likelihood" from the GP and np.inf otherwise
def lnlike(theta, y, gp):
    return -eztao.ts.neg_param_ll(theta, y, gp)

def lnprob(theta, y, gp):
    lp_bounds = lnprior_bounds(theta)
    lp_perturb = lnprior_perturb(theta)                              
    if not numpy.isfinite(lp_bounds):
        return -numpy.inf
    return lp_bounds + lp_perturb + lnlike(theta, y, gp)

################################
# Define other functions
################################

# chi-sqared
def chisqg(y_data, y_model, sd=None):
    chisq = numpy.nansum(((y_data-y_model)/sd)**2)
    return chisq

#### CARMA Process

In [4]:
# Pass string list key where:
# - key[0] = dataframe filename + location
# - key[1] = RA
# - key[2] = DEC
# - key[3] = filter (i.e r, g, b, i) 
# - key[4] = field
# - key[5] = minimum LC length
def getCARMAstats(key):
    ################################
    # setup
    ################################
    
    # read-in FP df
    df = pandas.read_parquet(key[0])
    
    # grab row in df that has the same filter and field
    df = df.loc[df['RA'] == key[1]]
    df = df.loc[df['DEC'] == key[2]]
    df = df.loc[df['filter'] == key[3]]
    df = df.loc[df['field'] == key[4]]
    
    # obtain values from df
    ra = df['RA'].to_numpy()[0]
    dec = df['DEC'].to_numpy()[0]
    t = df['JD'].to_numpy()[0]
    y_real = df['mag'].to_numpy()[0]
    yerr_real = df['magerr'].to_numpy()[0]
    
    # exclude indicies with nan's or outliers
    good_ind = numpy.where((y_real > 10) & (y_real < 30))[0]
    t = t[good_ind]
    y_real = y_real[good_ind]
    yerr_real = yerr_real[good_ind]
    
    # invert the magnitudes
    y_real_inverted = (min(y_real)-y_real)

    # normalize to unit standard deviation and zero mean
    y = (y_real_inverted - numpy.mean(y_real_inverted))/numpy.std(y_real_inverted)
    yerr = yerr_real/numpy.std(y_real_inverted)
    
    # generate filename of FP LC
    file_name = f'fp_lc_{ra}_{dec}_field_{key[4]}.csv'
        
    # assert LC meets minimum length
    lc_length = len(t)
    assert lc_length >= key[5], (f'{file_name} does not meet minimum length of {key[5]}: lc_length = {lc_length}')
    
    
    ################################
    ################################
    #
    # DRW Process
    #
    ################################
    ################################
    
    # obtain best-fit
    bounds = [(0.01, 10.0), (0.01, 10.0)]
    best_drw = eztao.ts.drw_fit(t, y, yerr, user_bounds=bounds)
    
    # get best-fit in CARMA space
    best_drw_arma = numpy.exp(get_carma_parameter(best_drw[0], best_drw[1]))
    
    
    ################################
    ################################
    #
    # DHO Process
    #
    ################################
    ################################
    
    # obtain best-fit
    bounds = [(-15, 15), (-15, 15), (-20, 10), (-20, 10)]
    best_dho = eztao.ts.dho_fit(t, y, yerr, user_bounds=bounds)

    # Create the GP model -- instead of creating a "model" function that is then called by the "lnlike" function from tutorial,
    #  we will create a GP that will be passed as an argument to the MCMC sampler. This will be the "gp" that is passed to
    #  the "lnprob" and "param_ll" functions
    dho_kernel = eztao.carma.DHO_term(*numpy.log(best_dho))
    dho_gp = celerite.GP(dho_kernel, mean=numpy.median(y))
    dho_gp.compute(t, yerr)

    ################################
    # MCMC
    ################################

    # Initalize MCMC
    data = (t, y, yerr)
    nwalkers = 128
    niter = 2048

    initial = numpy.array(numpy.log(best_dho))
    ndim = len(initial)
    p0 = [numpy.array(initial) + 1e-7 * numpy.random.randn(ndim) for i in range(nwalkers)]

    # Create the MCMC sampler -- note that the GP is passed as an argument in addition to the data
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=[y, dho_gp])

    # run a burn-in surrounding the best-fit parameters obtained above
    p0, lp, _ = sampler.run_mcmc(p0, 200)
    sampler.reset()

    # clear up the stored chain from burn-in, rerun the MCMC
    pos, prob, state = sampler.run_mcmc(p0, niter);

    ################################
    # Obtain the Best Fit: theta_max
    ################################

    # put all the samples that explored in MCMC into a single array
    samples = sampler.flatchain
    
    # find the parameters that have the best fit 
    theta_max_index = numpy.argmax(sampler.flatlnprobability)
    theta_max_probability = sampler.flatlnprobability[theta_max_index]
   
    theta_max  = samples[theta_max_index] # these are in log-space
    theta_max_norm = numpy.exp(theta_max) # take the exponent to get into 'normal' space
    
    
    ################################
    ################################
    #
    # Simulate and Return
    #
    ################################
    ################################
    
    ################################
    # Simulate and plot light curves
    ################################
    
    # create simulated light curve
    drw_sim_t, drw_sim_y, drw_sim_yerr = eztao.ts.carma_sim.pred_lc(t, y, yerr, best_drw_arma, 1, t)
    dho_sim_t, dho_sim_y, dho_sim_yerr = eztao.ts.carma_sim.pred_lc(t, y, yerr, theta_max_norm, 2, t)
    
    # directory to save plots to
    plot_dir = 'carma_plots'
    # plot drw
    plot = False  
    if plot:
        matplotlib.pyplot.figure()
        matplotlib.pyplot.errorbar(t, y, yerr=yerr, label='data',
                                   linestyle="None", marker='.', ms=3., color='purple', ecolor='0.8')
        matplotlib.pyplot.plot(drw_sim_t, drw_sim_y, label=f'drw {best_drw_arma}')
        matplotlib.pyplot.legend()
        matplotlib.pyplot.savefig(f'{plot_dir}/{file_name}_drw_fit.png')
        matplotlib.pyplot.close()

        # plot dho
        matplotlib.pyplot.figure()
        matplotlib.pyplot.errorbar(t, y, yerr=yerr, label='data',
                                   linestyle="None", marker='.', ms=3., color='purple', ecolor='0.8')
        matplotlib.pyplot.plot(dho_sim_t, dho_sim_y, label=f'dho {theta_max_norm}')
        matplotlib.pyplot.legend()
        matplotlib.pyplot.savefig(f'{plot_dir}/{file_name}_dho_fit.png')
        matplotlib.pyplot.close()
    
    ################################
    # Determine best fit
    ################################
    
    # get chi-squared from sim light curves
    chisq_drw = chisqg(y, drw_sim_y, yerr)
    chisq_dho = chisqg(y, dho_sim_y, yerr)
    
    # determine best fit
    best_fit = 'DRW'
    if chisq_drw > chisq_dho and not numpy.isinf(chisq_dho):
        best_fit = 'DHO'
    
    ################################
    # Return
    ################################
    
    return file_name, ra, dec, key[4], t, y_real, yerr_real, best_drw, best_drw_arma, chisq_drw, best_dho, theta_max_norm, theta_max_probability, chisq_dho, best_fit, lc_length

#### Initialization & Parallel Python

In [5]:
ppservers = ()

# creates jobserver with ncpus workers
ncpus = 24
job_server = pp.Server(ncpus, ppservers=ppservers)

print("Starting pp with", job_server.get_ncpus(), "workers")

# read-in FP df
file = 'BAT_AGN_ZTF_ForcedPhotometry_LightCurves_AllBands.parquet'
df = pandas.read_parquet(file)

# create list of keys based on a given filter
# filter_type = 'g'
keys = []
for index, row in df.iterrows():
    # check if the row matches the filter_type
    if row['filter'] == filter_type:
       
        # minimum allowed LC length
        min_length = 10
        
        # check if the LC meets the minimum length (excluding indices with outliers)
        good_ind = numpy.where((row['mag'] > 10) & (row['mag'] < 30))[0]
        t = row['JD'][good_ind]
        if len(t) >= min_length:
            keys.append([file, row['RA'], row['DEC'], filter_type, row['field'], min_length])

# raise runtime exception if no keys are generated
if len(keys) == 0:
    raise ValueError(f'No keys were found: keys = {keys}')
    
# intialize lists to save to
file_names = []
ras = []
decs =[]
fields = []
times = []
magnitudes = []
mag_errors = []
best_fit_drws = []
best_fit_drws_arma = []
best_fit_dhos = []
best_mcmc_dhos = []
dho_probabilities = []
chi_squared_drw = []
chi_squared_dho = []
best_fits = []
lc_lengths = []

# Submit a list of jobs running getCARMAstats for each file in repository
# getCARMAstats - the function
# (key,) - [filter_type, field] function parameter
# (chisqg, ...) - tuple with functions on which getCARMAstats depends
# ("numpy", ...) - tuple with package dependencies to be imported
jobs = [(key, job_server.submit(getCARMAstats ,(key,), 
                                 (get_carma_parameter, lnprior_perturb, lnprior_bounds, lnlike, lnprob, chisqg,), 
                                 ("numpy", "matplotlib.pyplot", "pandas", "emcee", "eztao", "eztao.ts",
                                  "celerite"))) for key in keys]

job_num = 1
for file, job in jobs:
    # start job
    file_name, ra, dec, field, t, y, yerr, best_drw, best_drw_arma, chisq_drw, best_dho, best_mcmc_dho, dho_probability, chisq_dho, best_fit, lc_length = job()
        
    # save data from job
    file_names.append(file_name)
    ras.append(ra)
    decs.append(dec)
    fields.append(field)
    times.append(t)
    magnitudes.append(y)
    mag_errors.append(yerr)
    best_fit_drws.append(best_drw)
    best_fit_drws_arma.append(best_drw_arma)
    chi_squared_drw.append(chisq_drw)
    best_fit_dhos.append(best_dho)
    best_mcmc_dhos.append(best_mcmc_dho)
    dho_probabilities.append(dho_probability)
    chi_squared_dho.append(chisq_dho)
    best_fits.append(best_fit)
    lc_lengths.append(lc_length)
    
    print(f'Completed [{job_num}/{len(jobs)}]: {file_name}')
    job_num += 1

job_server.print_stats()

Starting pp with 24 workers
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
 Completed [1/544]: fp_lc_0.20323455_-7.1532089_field_1389.csv
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
 Completed [2/544]: fp_lc_0.20323455_-7.1532089_field_395.csv
Completed [3/544]: fp_lc_0.8642925999999999_27.654793_field_1645.csv
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
 Completed [4/544]: fp_lc_0.8642925999999999_27.654793_field_600.csv
 Completed [5/544]: fp_lc_1.0082763_70.3217215_field_1880.csv
Completed [6/544]: fp_lc_1.0082763_70.3217215_field_833.csv
 Completed [7/544]: fp_lc_1.0082763_70.3217215_field_853.csv
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
 Completed [8/544]: fp_lc_1.58140115_20.20296745_field_1596.csv
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
 Completed [9/544]: fp_lc_1.58140115_20.20296745_field_550.csv
  var = -np.sum(KxsT * self.apply_inverse(KxsT), axis=0)
 Completed [10/544]: fp_lc_10.75782695_30.2887766_field_6

#### Create and Save Dataframe with Results

In [6]:
agn_fit_data = pandas.DataFrame({'Filenames': file_names, 'RA': ras, 'DEC': decs, 'field': fields, 'Times (JD)': times, 
                                 'Magnitudes': magnitudes, 'Mag Errors': mag_errors, 
                                 'Best DRW Fit': best_fit_drws, 'Best DRW ARMA Fit': best_fit_drws_arma, 'DRW chisq': chi_squared_drw,
                                 'Best DHO Fit': best_fit_dhos, 'DHO MCMC Fit': best_mcmc_dhos, 'DHO MCMC Probability': dho_probabilities, 'DHO chisq': chi_squared_dho,
                                 'Best Fit': best_fits, 'LC Length': lc_lengths})

# save dataframe
agn_fit_data.to_parquet(f'AGN_FP_{filter_type}_CarmaFits.parquet')
agn_fit_data

Unnamed: 0,Filenames,RA,DEC,field,Times (JD),Magnitudes,Mag Errors,Best DRW Fit,Best DRW ARMA Fit,DRW chisq,Best DHO Fit,DHO MCMC Fit,DHO MCMC Probability,DHO chisq,Best Fit,LC Length
0,fp_lc_0.20323455_-7.1532089_field_1389.csv,0.20323455,-7.1532089,1389,"[2458353.8754282, 2458362.8815394, 2458719.940...","[21.114627750693703, 21.289021923923134, 21.94...","[0.3386452047814808, 0.3950931108889877, 0.577...","[1.0890750261579591, 1.010050167084168]","[0.3992328749481987, 3.930495413751836]",4.387582e-03,"[0.0054662949943588435, 0.47167175182875204, 0...","[0.0036956914047161665, 331.91860189800474, 1....",-18.575297,1.446756,DRW,14
1,fp_lc_0.20323455_-7.1532089_field_395.csv,0.20323455,-7.1532089,395,"[2458274.9818287, 2458283.9800926, 2458290.980...","[20.92020347085257, 22.432021890399437, 21.481...","[0.5943087735637443, 1.8142867589951983, 0.688...","[1.010050167084168, 3.4151082310929746]","[0.3715581744238082, 122.19329622113878]",7.087514e-02,"[77.34610436052499, 3.059023205018258e-07, 0.0...","[78.21687680746702, 1.283897074590607e-06, 0.0...",-191.596852,38.348725,DRW,146
2,fp_lc_0.8642925999999999_27.654793_field_1645.csv,0.8642925999999999,27.654793,1645,"[2458326.8911343, 2458327.9079167, 2458334.884...","[21.348698132565097, 21.49658811297937, 20.925...","[0.6736513397863145, 0.8380042940550074, 0.280...","[1.010050167084168, 101.58972026029552]","[0.3715581744238082, 1.2125547571466234e+62]",5.824487e-24,"[4.595716822245285e-05, 10963.981981229173, 0....","[6.247974142483852e-07, 10963.983930564798, 2....",-30.823526,15.120620,DRW,30
3,fp_lc_0.8642925999999999_27.654793_field_600.csv,0.8642925999999999,27.654793,600,"[2458271.943206, 2458283.9677315, 2458289.9685...","[22.596542670127025, 21.95674181070741, 22.465...","[2.039169973894654, 0.5683894481210139, 0.7542...","[1.010050167084168, 6.8477853267893725]","[0.3715581744238082, 15304.934601290344]",2.943597e-07,"[98.46442496663634, 0.3128070211054629, 3.9911...","[98.77896694416344, 0.33640931214029435, 4.067...",-539.373723,131.265177,DRW,434
4,fp_lc_1.0082763_70.3217215_field_1880.csv,1.0082763,70.3217215,1880,"[2458332.8937384, 2458355.8120833, 2458357.834...","[21.763203502015056, 22.469045959012988, 22.68...","[1.1931352029130742, 1.389484154205049, 1.8009...","[1.010050167084168, 22026.465794806718]","[0.3715581744238082, inf]",0.000000e+00,"[3.059023205018258e-07, 14.797721210284303, 0....","[2.956453113228859e-06, 2648.026485965237, 0.0...",-40.488644,1.812734,DRW,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539,fp_lc_90.658166_65.3713197_field_1845.csv,90.658166,65.3713197,1845,"[2458429.8649884, 2458432.8586458, 2458435.870...","[19.51079332398352, 19.92874020917677, 22.7776...","[0.0649831366493067, 0.1276850558972109, 3.108...","[1.010050167084168, 1.010050167084168]","[0.3715581744238082, 4.142511876711821]",5.420336e+01,"[1164.1393452699144, 0.017351898950228012, 0.1...","[1819.6514207344092, 110.34813122509556, 169.0...",-56.574327,3.820059,DHO,44
540,fp_lc_90.658166_65.3713197_field_812.csv,90.658166,65.3713197,812,"[2458244.6648264, 2458323.9908102, 2458329.992...","[20.55106329429142, 20.083038610175493, 21.253...","[0.3859598015347237, 0.2101551264309361, 0.652...","[1.010050167084168, 1.010050167084168]","[0.3715581744238082, 4.142511876711821]",1.123338e+01,"[244.55200542362505, 1.797278903027526, 10.369...","[220.98919594532347, 1.7256137967433567, 10.05...",-206.377019,20.053248,DRW,168
541,fp_lc_93.9014798_71.03749785_field_838.csv,93.9014798,71.03749785,838,"[2458221.6981481, 2458249.6584954, 2458337.987...","[19.236577335792735, 17.29988814022734, 18.877...","[0.2172951448825648, 0.056031439392939, 0.0508...","[3.268897584545039, 1.010050167084168]","[0.736450297401709, 2.2035174544736105]",4.307139e+02,"[939.2309569230413, 2.3778076357522977, 0.0041...","[1.121242523375498, 1152842.8191032747, 0.0129...",-251.017137,34.089983,DHO,223
542,fp_lc_98.19657765_63.6736939_field_1845.csv,98.19657765,63.6736939,1845,"[2458429.8649884, 2458431.8742245, 2458432.858...","[19.08237525461287, 19.19463588394012, 19.4966...","[0.0451967837835527, 0.1375214022637934, 0.102...","[1.010050167084168, 1.010050167084168]","[0.3715581744238082, 4.142511876711821]",7.749060e+00,"[2805.006750055363, 4474.109170132985, 5.33959...","[0.1733226504973519, 2797573.395435756, 13.631...",-26.080163,2.241446,DHO,23


### 2.2 Best Fit Validation

Iterates through AGN Carma Fits and validates if the best if *is* indeed the best fit based on the parameters:

- If DRW, then drw_chisq != 0
- If DHO, then dho_chisq != inf

If both fits are bad by this definition, then the best fit is replaced with 'None'

In [8]:
# modify datarame to account for bad fits
# if best chi-sq has a bad fit, then check the other chi-sq
# if both fits are bad, then assign chi-sq to 'None'

import numpy as np
import pandas as pd

agn_fit_data = pd.read_parquet(f'AGN_FP_{filter_type}_CarmaFits.parquet')

# iterate through each AGN
adjusted_best_fits = []
for index, row in agn_fit_data.iterrows():
    best_fit = row['Best Fit']
    drw_chisq = row['DRW chisq']
    dho_chisq = row['DHO chisq']
    
    
    # if Best Fit is DRW and is bad...
    if best_fit == 'DRW' and drw_chisq == 0:
        
        if not np.isinf(dho_chisq):
            adjusted_best_fits.append('DHO')
        else:
            adjusted_best_fits.append('None')
            
    # if Best Fit is DHO and is bad...
    elif best_fit == 'DRW' and np.isinf(dho_chisq):
        if not drw_chisq == 0:
            adjusted_best_fits.append('DRW')
        else:
            adjusted_best_fits.append('None')
    
    # if Best Fit is good, then save it
    else:
        adjusted_best_fits.append(best_fit)
        
# replace adjusted values
agn_fit_data['Best Fit'] = adjusted_best_fits

# removed unamed colemns
agn_fit_data = agn_fit_data.loc[:, ~agn_fit_data.columns.str.contains('^Unnamed')]

# save dataframe
agn_fit_data.to_parquet(f'AGN_FP_{filter_type}_CarmaFits.parquet')

In [9]:
agn_fit_data = pd.read_parquet(f'AGN_FP_{filter_type}_CarmaFits.parquet')
agn_fit_data

Unnamed: 0,Filenames,RA,DEC,field,Times (JD),Magnitudes,Mag Errors,Best DRW Fit,Best DRW ARMA Fit,DRW chisq,Best DHO Fit,DHO MCMC Fit,DHO MCMC Probability,DHO chisq,Best Fit,LC Length
0,fp_lc_0.20323455_-7.1532089_field_1389.csv,0.20323455,-7.1532089,1389,"[2458353.8754282, 2458362.8815394, 2458719.940...","[21.114627750693703, 21.289021923923134, 21.94...","[0.3386452047814808, 0.3950931108889877, 0.577...","[1.0890750261579591, 1.010050167084168]","[0.3992328749481987, 3.930495413751836]",4.387582e-03,"[0.0054662949943588435, 0.47167175182875204, 0...","[0.0036956914047161665, 331.91860189800474, 1....",-18.575297,1.446756,DRW,14
1,fp_lc_0.20323455_-7.1532089_field_395.csv,0.20323455,-7.1532089,395,"[2458274.9818287, 2458283.9800926, 2458290.980...","[20.92020347085257, 22.432021890399437, 21.481...","[0.5943087735637443, 1.8142867589951983, 0.688...","[1.010050167084168, 3.4151082310929746]","[0.3715581744238082, 122.19329622113878]",7.087514e-02,"[77.34610436052499, 3.059023205018258e-07, 0.0...","[78.21687680746702, 1.283897074590607e-06, 0.0...",-191.596852,38.348725,DRW,146
2,fp_lc_0.8642925999999999_27.654793_field_1645.csv,0.8642925999999999,27.654793,1645,"[2458326.8911343, 2458327.9079167, 2458334.884...","[21.348698132565097, 21.49658811297937, 20.925...","[0.6736513397863145, 0.8380042940550074, 0.280...","[1.010050167084168, 101.58972026029552]","[0.3715581744238082, 1.2125547571466234e+62]",5.824487e-24,"[4.595716822245285e-05, 10963.981981229173, 0....","[6.247974142483852e-07, 10963.983930564798, 2....",-30.823526,15.120620,DRW,30
3,fp_lc_0.8642925999999999_27.654793_field_600.csv,0.8642925999999999,27.654793,600,"[2458271.943206, 2458283.9677315, 2458289.9685...","[22.596542670127025, 21.95674181070741, 22.465...","[2.039169973894654, 0.5683894481210139, 0.7542...","[1.010050167084168, 6.8477853267893725]","[0.3715581744238082, 15304.934601290344]",2.943597e-07,"[98.46442496663634, 0.3128070211054629, 3.9911...","[98.77896694416344, 0.33640931214029435, 4.067...",-539.373723,131.265177,DRW,434
4,fp_lc_1.0082763_70.3217215_field_1880.csv,1.0082763,70.3217215,1880,"[2458332.8937384, 2458355.8120833, 2458357.834...","[21.763203502015056, 22.469045959012988, 22.68...","[1.1931352029130742, 1.389484154205049, 1.8009...","[1.010050167084168, 22026.465794806718]","[0.3715581744238082, inf]",0.000000e+00,"[3.059023205018258e-07, 14.797721210284303, 0....","[2.956453113228859e-06, 2648.026485965237, 0.0...",-40.488644,1.812734,DHO,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539,fp_lc_90.658166_65.3713197_field_1845.csv,90.658166,65.3713197,1845,"[2458429.8649884, 2458432.8586458, 2458435.870...","[19.51079332398352, 19.92874020917677, 22.7776...","[0.0649831366493067, 0.1276850558972109, 3.108...","[1.010050167084168, 1.010050167084168]","[0.3715581744238082, 4.142511876711821]",5.420336e+01,"[1164.1393452699144, 0.017351898950228012, 0.1...","[1819.6514207344092, 110.34813122509556, 169.0...",-56.574327,3.820059,DHO,44
540,fp_lc_90.658166_65.3713197_field_812.csv,90.658166,65.3713197,812,"[2458244.6648264, 2458323.9908102, 2458329.992...","[20.55106329429142, 20.083038610175493, 21.253...","[0.3859598015347237, 0.2101551264309361, 0.652...","[1.010050167084168, 1.010050167084168]","[0.3715581744238082, 4.142511876711821]",1.123338e+01,"[244.55200542362505, 1.797278903027526, 10.369...","[220.98919594532347, 1.7256137967433567, 10.05...",-206.377019,20.053248,DRW,168
541,fp_lc_93.9014798_71.03749785_field_838.csv,93.9014798,71.03749785,838,"[2458221.6981481, 2458249.6584954, 2458337.987...","[19.236577335792735, 17.29988814022734, 18.877...","[0.2172951448825648, 0.056031439392939, 0.0508...","[3.268897584545039, 1.010050167084168]","[0.736450297401709, 2.2035174544736105]",4.307139e+02,"[939.2309569230413, 2.3778076357522977, 0.0041...","[1.121242523375498, 1152842.8191032747, 0.0129...",-251.017137,34.089983,DHO,223
542,fp_lc_98.19657765_63.6736939_field_1845.csv,98.19657765,63.6736939,1845,"[2458429.8649884, 2458431.8742245, 2458432.858...","[19.08237525461287, 19.19463588394012, 19.4966...","[0.0451967837835527, 0.1375214022637934, 0.102...","[1.010050167084168, 1.010050167084168]","[0.3715581744238082, 4.142511876711821]",7.749060e+00,"[2805.006750055363, 4474.109170132985, 5.33959...","[0.1733226504973519, 2797573.395435756, 13.631...",-26.080163,2.241446,DHO,23


- - -
- - -

## Chapter 3: Data Migration

This chapter merges dataframes containing other pertinent data (i.e. AGN properties) into one master dataframe

### 3.1 Merge BASS AGN Properties

Merges a dataframe containing pertinent AGN properties provided by the BAT AGN Spectroscopic Survey

In [4]:
import numpy as np
import pandas as pd

# read carma dataframe
agn_fit_data = pd.read_parquet(f'AGN_FP_{filter_type}_CarmaFits.parquet')

# read in properties dataframe
agn_properties = pd.read_csv("../BAT_AGN_BASS_Data.csv")

################################
# Round RA's and DEC's of dataframes
################################

decimal_places = 1

# round fit RA and DEC
agn_fit_data = round_colemn(agn_fit_data, 'RA', decimal_places)
agn_fit_data = round_colemn(agn_fit_data, 'DEC', decimal_places)

# round properties RA and DEC
agn_properties = round_colemn(agn_properties, 'RA', decimal_places)
agn_properties = round_colemn(agn_properties, 'DEC', decimal_places)

################################
# Merge CARMA and BASS dataframes
################################

# merge dataframes
agn_dataframe = pd.merge(agn_fit_data, agn_properties, how='left', on=['RA', 'DEC'], validate='many_to_one')

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save as master dataframe
agn_dataframe.to_parquet(f'AGN_FP_{filter_type}_MasterDF.parquet')

### 3.2 Class and Type

Merges dataframe containing the class and type of AGN provided by BAT

In [5]:
import numpy as np
import pandas as pd

################################
# Prepare BAT AGN dataframe for merge
################################

catalog = pd.read_csv('../BAT_70m_catalog_20nov2012.txt', sep='|')

# removed unamed colemns
catalog = catalog.loc[:, ~catalog.columns.str.contains('^Unnamed')]

# round fit RA and DEC
decimal_places = 1
catalog = round_colemn(catalog, 'CTPT_RA', decimal_places)
catalog = round_colemn(catalog, 'CTPT_DEC', decimal_places)

# create new dataframe with RAs, Decs, and desired data
catalog = pd.DataFrame({'RA': catalog['CTPT_RA'], 'DEC': catalog['CTPT_DEC'],
                       'CL2': catalog['CL2'], 'TYPE': catalog['TYPE']})

################################
# Merge Master and BAT dataframes
################################

# read master dataframe
agn_dataframe = pd.read_parquet("AGN_FP_g_MasterDF.parquet")

# merge dataframes
agn_dataframe = pd.merge(agn_dataframe, catalog, how='left', on=['RA', 'DEC'], validate='many_to_one')

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save to master dataframe
agn_dataframe.to_parquet(f'AGN_FP_{filter_type}_MasterDF.parquet')

### 3.3 SNR
This section calculates the SNR for each AGN

In [6]:
import numpy as np
import pandas as pd

# read master dataframe
agn_dataframe = pd.read_parquet(f'AGN_FP_{filter_type}_MasterDF.parquet')

# calculate snr for each row in the data frame
snr = []

# index each AGN in agn_fit_data and calculate SNR
for index, row in agn_dataframe.iterrows():
    # get data from row
    y = row['Magnitudes']
    err = row['Mag Errors']
    
    # remove NaN's from dataset
    good_ind = np.where((y > 10) & (y < 30))[0]
    y = y[good_ind]
    err = err[good_ind]
    
    # calculate SNR for row
    mean_y = np.mean(y)
    mean_err = np.mean(err)
    snr_row = mean_y / mean_err
    
    # save data
    snr.append(snr_row)

# add snr
agn_dataframe['SNR'] = snr

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save to master dataframe
agn_dataframe.to_parquet(f'AGN_FP_{filter_type}_MasterDF.parquet')

 - - -
 - - -

## Chapter 4: DHO Timescales

This chapter focuses on calculating missing DHO timescale infomration and exploring properties of that data

### 4.1 Calculate Timescales
Calcluates DHO timescales and adds data to master dataframe. **See Apendix A for dho_timescales()**

In [7]:
import numpy as np
import pandas as pd

# read master dataframe
agn_dataframe = pd.read_parquet(f'AGN_FP_{filter_type}_MasterDF.parquet')

xis = []
tau_decays = []
tau_rise_dqpos = []
tau_perturbs = []
tau_decorrs = []
omega_0s = []

for index, row in agn_dataframe.iterrows():
    # read dho parameters
    dho_params = row['DHO MCMC Fit']
    
    # calculate timescale
    timescales = dho_timescales(dho_params)
    
    # save time scale data of current row
    xis.append(timescales[0])
    tau_decays.append(timescales[1])
    tau_rise_dqpos.append(timescales[2])
    tau_perturbs.append(timescales[3])
    tau_decorrs.append(timescales[4])
    omega_0s.append(timescales[5])
    
# create new dataframe of timescale data
timescales_df = pd.DataFrame({'xi': xis, 
                              'tau_decay': tau_decays, 'tau_rise_dqpo': tau_rise_dqpos, 'tau_perturb': tau_perturbs, 'tau_decorr': tau_decorrs, 
                              'omega_0': omega_0s})

# merge timescale dataframe with master dataframe
agn_dataframe = agn_dataframe.join(timescales_df)

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save to master dataframe
agn_dataframe.to_parquet(f'AGN_FP_{filter_type}_MasterDF.parquet')

ValueError: columns overlap but no suffix specified: Index(['xi', 'tau_decay', 'tau_rise_dqpo', 'tau_perturb', 'tau_decorr',
       'omega_0'],
      dtype='object')

### 4.2 Timescale Damping

Determine Damping of DHO Timescale based on xi

In [4]:
import numpy as np
import pandas as pd

# read master dataframe
agn_dataframe = pd.read_parquet(f'AGN_FP_{filter_type}_MasterDF.parquet')

# determined damping
xis = agn_dataframe['xi']
oscillations = []

for xi in xis:
    if xi > 1:
        oscillations.append('overdamped')
    elif xi < 1:
        oscillations.append('underdamped')
    else:
        oscillations.append('critically damped')

agn_dataframe['oscillation'] = oscillations

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save to master dataframe
agn_dataframe.to_parquet(f'AGN_FP_{filter_type}_MasterDF.parquet')

### 4.3 "Bad Fits" Based on Timescales

- timescales that are smaller than half the minimum distance between two points in the light curve (e.g., min(delta_t)) or larger than twice the length of the light curve.

- timescales to reference: tau_decorr, tau_decay, and tau_rise

In [5]:
import numpy as np
import pandas as pd

# read master dataframe
agn_dataframe = pd.read_parquet(f'AGN_FP_{filter_type}_MasterDF.parquet')

################################
# Create empty lists to save values to
################################

good_timescale_fits = []

decorr_fits = []
decay_fits = []
rise_fits = []

################################
# Iterate light curves
################################

# for each light curve
for index, row in agn_dataframe.iterrows():
    ################################ 
    # Read in time
    ################################
    t = row['Times (JD)']
    
    assert not np.isinf(t[0]), f'Cannot have time of infinity! (index: {index})'
        
    ################################ 
    # Get half of the min time interval
    ################################
    
    # initialize min_t as the 32 bit integer limit
    min_interval = np.iinfo(np.int32).max
   
    # find min time interval
    for i in range(len(t)-2):
        curr_interval = t[i+1] - t[i]
        min_interval = min_interval if min_interval < curr_interval else curr_interval
        
    # save 1/2 * min(delta_t)
    min_t = 0.5 * min_interval
    
    ################################ 
    # Get twice the timeframe
    ################################
    
    timeframe = t[len(t)-1] - t[0]
    max_t = timeframe * 2 
    
    ################################
    # Compare time with timescales
    ################################
    
    tau_decay = row['tau_decay']
    tau_rise = row['tau_rise_dqpo']
    tau_decorr = row['tau_decorr']
    
    # Compare and assign value based on result
        # ontime = good
        # undertime = < min
        # overtime = > max
    def timescale_fit(timescale, max_t, min_t):
        if timescale > max_t:
            fit = 'overtime'
        elif timescale < min_t: 
            fit= 'undertime'
        else:
            fit = 'ontime'
        return fit
    
    decay_fit = timescale_fit(tau_decay, max_t, min_t)
    rise_fit = timescale_fit(tau_rise, max_t, min_t)
    decorr_fit = timescale_fit(tau_decorr, max_t, min_t)
    
    # Save as single boolean
        # TRUE = good fit
        # FALSE = bad fit
    good_timescale_fit = 'ontime' == decay_fit == rise_fit == decorr_fit
        
    # append to lists
    good_timescale_fits.append(good_timescale_fit)
    decay_fits.append(decay_fit)
    rise_fits.append(rise_fit)
    decorr_fits.append(decorr_fit)

################################
# Update datframe
################################

# add to dataframe in four colmens
agn_dataframe['good timescale fit'] = good_timescale_fits
agn_dataframe['tau_decay fit'] = decay_fits
agn_dataframe['tau_rise_dqpo fit'] = rise_fits
agn_dataframe['tau_decorr fit'] = decorr_fits

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save to master dataframe
agn_dataframe.to_parquet(f'AGN_FP_{filter_type}_MasterDF.parquet')

  min_interval = (curr_interval, min_interval)[min_interval < curr_interval]


- - -
- - -

## Appendix A: Functions

Miscillaneous Functions that are useful or used multiple times throughout this workbook

#### round_colemn()

In [2]:
# rounds all values in a given colemn of a Pandas df to a decimal place
def round_colemn(df, key, decimals=3):    
    df[key] = np.around([float(i) for i in df[key].tolist()], decimals=decimals)
    
    # note: return statement is redundant since Python is pass-by-reference
    # but return is generally good practice
    return df

#### dho_timescales()

In [3]:
def dho_timescales(params):
    """Compute a couple DHO timescales from CARMA parameters (in normal space).

    - damping factor
    - decay timescale
    - rise/damped QPO timescale
    - perturbation timescale
    - decorrelation timescale
    - natural oscillation frequency
    """
   
    # expand params
    a1, a2, b0, b1 = params  

    # damping factor & natural frequency
    xi = a1/(2*np.sqrt(a2))
    omega_0 = np.sqrt(a2)   

    # placeholder for two timescales
    tau_perturb = b1/b0
    tau_decay = 0
    tau_rise_dqpo = 0
    tau_decorr = 0

    roots = np.roots([1, a1, a2])
    if xi < 1:
        tau_decay = np.abs(1/roots[0].real)
        tau_rise_dqpo = 2*np.pi*np.abs(1/roots[0].imag)/np.sqrt(1 - xi**2)
        tau_decorr = (np.pi/2)*np.pi*2/omega_0
    else:
        tau_decay = np.abs(1/np.max(roots.real))
        tau_rise_dqpo = np.abs(1/np.min(roots.real))
        tau_decorr = (tau_decay + tau_rise_dqpo)*np.pi/2
 
    return np.array([xi, tau_decay, tau_rise_dqpo, tau_perturb, tau_decorr, omega_0])