### Parallel Python

In [1]:
import numpy
import matplotlib.pyplot
import pandas
import glob
import emcee

import eztao
import eztao.ts

import celerite

import pp

In [2]:
################################
# Define CARMA function for DRW
################################

def get_carma_parameter(tau, amp):
    """Get DRW parameters in CARMA notation (alpha_*/beta_*).

    alpha_1 = -1 / tau
    sigma^2 = tau * sigma_kbs^2 / 2
    sigma_kbs = np.sqrt( 2 * sigma^2 / tau )
    beta_0 = sigma_kbs

    Returns:
        [alpha_1, beta_0].
    """
    return [-1.0 / tau, numpy.sqrt( 2.0 * amp**2.0 / tau)]

################################
# Define the prior and log-probability functions for MCMC
################################

# prior function for tau_perturb
def lnprior_perturb(theta):
    """Prior on perturbation timescale. Note: this is a wedge like prior."""

    # determine DHO timescales
    log10_tau_perturb = (theta[-1] - theta[-2])/numpy.log(10)
    if -3 <= log10_tau_perturb <= 5:
        prior = 0
    else:
        prior = -(numpy.abs(log10_tau_perturb - 1) - 4)

    return prior

def lnprior_bounds(theta):
    """Prior on AR and MA parameters. This is a flat prior."""

    # Place some bounds on the parameter space
    bounds_low = numpy.array([-15, -15, -20, -20])
    bounds_high = numpy.array([15, 15, 10, 10])

    log_a1, log_a2, log_b0, log_b1 = theta
    if ( 
        bounds_low[0] < log_a1 < bounds_high[0] 
        and bounds_low[1] < log_a2 < bounds_high[1] 
        and bounds_low[2] < log_b0 < bounds_high[2] 
        and bounds_low[3] < log_b1 < bounds_high[3] 
       ):
        return 0.0
    return -numpy.inf

# We'll use the eztao version which effectively returns "gp.log_likelihood" from the GP and np.inf otherwise
def lnlike(theta, y, gp):
    return -eztao.ts.neg_param_ll(theta, y, gp)

def lnprob(theta, y, gp):
    lp_bounds = lnprior_bounds(theta)
    lp_perturb = lnprior_perturb(theta)                              
    if not numpy.isfinite(lp_bounds):
        return -numpy.inf
    return lp_bounds + lp_perturb + lnlike(theta, y, gp)

################################
# Define other functions
################################

# chi-sqared
def chisqg(y_data, y_model, sd=None):
    chisq = numpy.nansum(((y_data-y_model)/sd)**2)
    return chisq

In [13]:
def getCARMAstats(file):
    ################################
    # setup
    ################################

    #file_name = file[22:-8]
    file_name = file[22:-4]

    
    # read-in light curve
    df = pandas.read_csv(file)

    # obtain values from df
    ra = 0.8642750 #df['ra'].values[0]
    dec = 27.6548121 #df['dec'].values[0]
    #t = df['mjd'].values
    #y_real = df['mag'].values
    #yerr_real = df['magerr'].values
    t = df['jd'].values
    y_real = df['Magnitude (abs)'].values
    yerr_real = df['Magnitude Error (abs)'].values
    lc_length = len(t)
    
    # invert the magnitudes
    y_real_inverted = (min(y_real)-y_real)

    # normalize to unit standard deviation and zero mean
    y = (y_real_inverted - numpy.mean(y_real_inverted))/numpy.std(y_real_inverted)
    yerr = yerr_real/numpy.std(y_real_inverted)
        
    
    ################################
    ################################
    #
    # DRW Process
    #
    ################################
    ################################
    
    # obtain best-fit
    bounds = [(0.01, 10.0), (0.01, 10.0)]
    best_drw = eztao.ts.drw_fit(t, y, yerr, user_bounds=bounds)
    
    # get best-fit in CARMA space
    best_drw_arma = numpy.exp(get_carma_parameter(best_drw[0], best_drw[1]))
    
    
    ################################
    ################################
    #
    # DHO Process
    #
    ################################
    ################################
    
    # obtain best-fit
    bounds = [(-15, 15), (-15, 15), (-20, 10), (-20, 10)]
    best_dho = eztao.ts.dho_fit(t, y, yerr, user_bounds=bounds)

    # Create the GP model -- instead of creating a "model" function that is then called by the "lnlike" function from tutorial,
    #  we will create a GP that will be passed as an argument to the MCMC sampler. This will be the "gp" that is passed to
    #  the "lnprob" and "param_ll" functions
    dho_kernel = eztao.carma.DHO_term(*numpy.log(best_dho))
    dho_gp = celerite.GP(dho_kernel, mean=numpy.median(y))
    dho_gp.compute(t, yerr)

    ################################
    # MCMC
    ################################

    # Initalize MCMC
    data = (t, y, yerr)
    nwalkers = 128
    niter = 2048

    initial = numpy.array(numpy.log(best_dho))
    ndim = len(initial)
    p0 = [numpy.array(initial) + 1e-7 * numpy.random.randn(ndim) for i in range(nwalkers)]

    # Create the MCMC sampler -- note that the GP is passed as an argument in addition to the data
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=[y, dho_gp])

    # run a burn-in surrounding the best-fit parameters obtained above
    p0, lp, _ = sampler.run_mcmc(p0, 200)
    sampler.reset()

    # clear up the stored chain from burn-in, rerun the MCMC
    pos, prob, state = sampler.run_mcmc(p0, niter);

    ################################
    # Obtain the Best Fit: theta_max
    ################################

    # put all the samples that explored in MCMC into a single array
    samples = sampler.flatchain
    
    # find the parameters that have the best fit 
    theta_max_index = numpy.argmax(sampler.flatlnprobability)
    theta_max_probability = sampler.flatlnprobability[theta_max_index]
   
    theta_max  = samples[theta_max_index] # these are in log-space
    theta_max_norm = numpy.exp(theta_max) # take the exponent to get into 'normal' space
    
    
    ################################
    ################################
    #
    # Simulate and Return
    #
    ################################
    ################################
    
    ################################
    # Simulate and plot light curves
    ################################
    
    # create simulated light curve
    drw_sim_t, drw_sim_y, drw_sim_yerr = eztao.ts.carma_sim.pred_lc(t, y, yerr, best_drw_arma, 1, t)
    dho_sim_t, dho_sim_y, dho_sim_yerr = eztao.ts.carma_sim.pred_lc(t, y, yerr, theta_max_norm, 2, t)
    
    # directory to save plots to
    plot_dir = 'plots_and_figures/carma_plots'
    # plot drw
    plot = True  
    if plot:
        matplotlib.pyplot.figure()
        matplotlib.pyplot.errorbar(t, y, yerr=yerr, label='data',
                                   linestyle="None", marker='.', ms=3., color='purple', ecolor='0.8')
        matplotlib.pyplot.plot(drw_sim_t, drw_sim_y, label=f'drw {best_drw_arma}')
        matplotlib.pyplot.legend()
        matplotlib.pyplot.savefig(f'{plot_dir}/{file_name}_drw_fit.png')
        matplotlib.pyplot.close()

        # plot dho
        matplotlib.pyplot.figure()
        matplotlib.pyplot.errorbar(t, y, yerr=yerr, label='data',
                                   linestyle="None", marker='.', ms=3., color='purple', ecolor='0.8')
        matplotlib.pyplot.plot(dho_sim_t, dho_sim_y, label=f'dho {theta_max_norm}')
        matplotlib.pyplot.legend()
        matplotlib.pyplot.savefig(f'{plot_dir}/{file_name}_dho_fit.png')
        matplotlib.pyplot.close()
    
    ################################
    # Determine best fit
    ################################
    
    # get chi-squared from sim light curves
    chisq_drw = chisqg(y, drw_sim_y, yerr)
    chisq_dho = chisqg(y, dho_sim_y, yerr)
    
    # determine best fit
    best_fit = 'DRW'
    if chisq_drw > chisq_dho and not numpy.isinf(chisq_dho):
        best_fit = 'DHO'
    
    ################################
    # Return
    ################################
    
    return file_name, ra, dec, t, y_real, yerr_real, best_drw, best_drw_arma, chisq_drw, best_dho, theta_max_norm, theta_max_probability, chisq_dho, best_fit, lc_length

In [11]:
ppservers = ()

# creates jobserver with ncpus workers
ncpus = 24
job_server = pp.Server(ncpus, ppservers=ppservers)

print("Starting pp with", job_server.get_ncpus(), "workers")

# get list of data files
#repository = glob.glob('data/*.csv')
#repository = glob.glob('../../AGN_LightCurves/*.parquet')
repository = glob.glob('../../FPh_LightCurves/*.csv')

# intialize lists to save to
file_names = []
times = []
magnitudes = []
mag_errors = []
ras = []
decs =[]
best_fit_drws = []
best_fit_drws_arma = []
best_fit_dhos = []
best_mcmc_dhos = []
dho_probabilities = []
chi_squared_drw = []
chi_squared_dho = []
best_fits = []
lc_lengths = []

# Submit a list of jobs running getCARMAstats for each file in repository
# getCARMAstats - the function
# (file,) - file with AGN lc
# (chisqg, ...) - tuple with functions on which getCARMAstats depends
# ("numpy", ...) - tuple with package dependencies to be imported
jobs = [(file, job_server.submit(getCARMAstats ,(file,), 
                                 (get_carma_parameter, lnprior_perturb, lnprior_bounds, lnlike, lnprob, chisqg,), 
                                 ("numpy", "matplotlib.pyplot", "pandas", "emcee", "eztao", "eztao.ts",
                                  "celerite"))) for file in repository]

job_num = 1
for file, job in jobs:
    # start job
    file_name, ra, dec, t, y, yerr, best_drw, best_drw_arma, chisq_drw, best_dho, best_mcmc_dho, dho_probability, chisq_dho, best_fit, lc_length = job()
        
    # save data from job
    file_names.append(file_name)
    ras.append(ra)
    decs.append(dec)
    times.append(t)
    magnitudes.append(y)
    mag_errors.append(yerr)
    best_fit_drws.append(best_drw)
    best_fit_drws_arma.append(best_drw_arma)
    chi_squared_drw.append(chisq_drw)
    best_fit_dhos.append(best_dho)
    best_mcmc_dhos.append(best_mcmc_dho)
    dho_probabilities.append(dho_probability)
    chi_squared_dho.append(chisq_dho)
    best_fits.append(best_fit)
    lc_lengths.append(lc_length)
    
    #print(f'Completed [{job_num}/{len(jobs)}]: {file_name}')
    job_num += 1

job_server.print_stats()

Starting pp with 24 workers
 Job execution statistics:
 job count | % of all jobs | job time sum | time per job | job server
         1 |        100.00 |       9.5670 |     9.567021 | local
Time elapsed since server creation 9.57351803779602
0 active tasks, 24 cores



In [12]:
fph_fit_data = pandas.DataFrame({'Filenames': file_names, 'RA': ras, 'DEC': decs, 'Times (MJD)': times, 
                                 'Magnitudes': magnitudes, 'Mag Errors': mag_errors, 
                                 'Best DRW Fit': best_fit_drws, 'Best DRW ARMA Fit': best_fit_drws_arma, 'DRW chisq': chi_squared_drw,
                                 'Best DHO Fit': best_fit_dhos, 'DHO MCMC Fit': best_mcmc_dhos, 'DHO MCMC Probability': dho_probabilities, 'DHO chisq': chi_squared_dho,
                                 'Best Fit': best_fits, 'LC Length': lc_lengths})

# save dataframe
#agn_fit_data.to_csv('agn_fit_data.csv')
fph_fit_data.to_csv('fph_fit_data.csv')
#agn_fit_data
fph_fit_data

Unnamed: 0,Filenames,RA,DEC,Times (MJD),Magnitudes,Mag Errors,Best DRW Fit,Best DRW ARMA Fit,DRW chisq,Best DHO Fit,DHO MCMC Fit,DHO MCMC Probability,DHO chisq,Best Fit,LC Length
0,forced_photometry_field600_RA_0.8642750_DEC_27...,0.864275,27.654812,"[2458271.9756597, 2458274.9529861, 2458277.946...","[18.106660110135675, 17.987523752735257, 17.96...","[0.1004438952663152, 0.0795561925400699, 0.076...","[1.010050167084168, 22026.465794806718]","[0.3715581744238082, inf]",0.0,"[3269017.3724721107, 0.009367783625251286, 1.1...","[3269017.3095584773, 0.00936778286760242, 1.16...",-inf,2.50765,DRW,467


### Combine Dataframes

In [2]:
import numpy as np
import pandas as pd

In [2]:
# read data frame
agn_fit_data = pd.read_csv("agn_fit_data.csv")

# read in properties data frame
agn_properties = pd.read_csv("BAT_AGN_BASS_Data.csv")

In [2]:
def round_colemn(df, key, decimals=3):
    df[key] = np.around(df[key].tolist(), decimals=decimals)
    
    # note: return statement is redundant since Python is pass-by-reference
    # but return is generally good practice
    return df

In [4]:
decimal_places = 1

# round fit RA and DEC
agn_fit_data = round_colemn(agn_fit_data, 'RA', decimal_places)
agn_fit_data = round_colemn(agn_fit_data, 'DEC', decimal_places)

# round properties RA and DEC
agn_properties = round_colemn(agn_properties, 'RA', decimal_places)
agn_properties = round_colemn(agn_properties, 'DEC', decimal_places)

In [6]:
# merge dataframes
agn_dataframe = pd.merge(agn_fit_data, agn_properties, how='left', on=['RA', 'DEC'], validate='one_to_one')

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save dataframe
agn_dataframe.to_csv('agn_dataframe.csv')

### Bad Fits
Exclude fits for DRW if chi-sq = 0 and DHO if chi-sq = inf 

In [1]:
import numpy as np
import pandas as pd

In [2]:
agn_dataframe = pd.read_csv("agn_dataframe.csv")

In [3]:
# modify datarame to account for bad fits
# if best chi-sq has a bad fit, then check the other chi-sq
# if both fits are bad, then assign chi-sq to 'None'

adjusted_best_fits = []

for index, row in agn_dataframe.iterrows():
    best_fit = row['Best Fit']
    drw_chisq = row['DRW_chi_sq']
    dho_chisq = row['DHO_chi_sq']
    
    # if Best Fit is DRW and is bad...
    if best_fit == 'DRW' and drw_chisq == 0:
        if not np.isinf(dho_chisq):
            adjusted_best_fits.append('DHO')
        else:
            adjusted_best_fits.append('None')
            
    # if Best Fit is DHO and is bad...
    elif best_fit == 'DRW' and np.isinf(dho_chisq):
        if not drw_chisq == 0:
            adjusted_best_fits.append('DRW')
        else:
            adjusted_best_fits.append('None')
    
    # if Best Fit is good, then save it
    else:
        adjusted_best_fits.append(best_fit)
        
# replace adjusted values
#agn_dataframe['Best Fit'] = agn_dataframe['Best Fit'].replace(adjusted_best_fits)
agn_dataframe['Best Fit'] = adjusted_best_fits

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save dataframe
agn_dataframe.to_csv('adjusted_agn_dataframe.csv')

### Calculate SNR

In [1]:
import numpy as np
import pandas as pd

In [2]:
agn_dataframe = pd.read_csv("adjusted_agn_dataframe.csv")

In [7]:
# calculate snr for each row in the data frame
snr = []

# index each AGN in agn_fit_data and calculate SNR
for index, row in agn_dataframe.iterrows():
    # get data from row
    y = row['Magnitudes']
    err = row['Mag Errors']
    
    # convert into float lists
    y = convert_to_float_list(y)
    err = convert_to_float_list(err)
    
    # mean of mag and err
    if y != flag:
        mean_y = np.mean(y)
        mean_err = np.mean(err)
        snr_row = mean_y / mean_err
    else:
        snr_row = np.NAN
    
    # save data
    snr.append(snr_row)

# add snr
agn_dataframe['SNR'] = snr

# save to new .csv file
agn_dataframe.to_csv('adjusted_agn_dataframe.csv')

### Remove any unamed colemns

In [5]:
filename = 'adjusted_agn_dataframe.csv'
agn_dataframe = pd.read_csv(filename)

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save dataframe
agn_dataframe.to_csv(filename)

### Timescales

In [1]:
import numpy as np
import pandas as pd

In [2]:
agn_dataframe = pd.read_csv('adjusted_agn_dataframe.csv')

In [3]:
def dho_timescales(params):
    """Compute a couple DHO timescales from CARMA parameters (in normal space).

    - damping factor
    - decay timescale
    - rise/damped QPO timescale
    - perturbation timescale
    - decorrelation timescale
    - natural oscillation frequency
    """
   
    # expand params
    a1, a2, b0, b1 = params  

    # damping factor & natural frequency
    xi = a1/(2*np.sqrt(a2))
    omega_0 = np.sqrt(a2)   

    # placeholder for two timescales
    tau_perturb = b1/b0
    tau_decay = 0
    tau_rise_dqpo = 0
    tau_decorr = 0

    roots = np.roots([1, a1, a2])
    if xi < 1:
        tau_decay = np.abs(1/roots[0].real)
        tau_rise_dqpo = 2*np.pi*np.abs(1/roots[0].imag)/np.sqrt(1 - xi**2)
        tau_decorr = (np.pi/2)*np.pi*2/omega_0
    else:
        tau_decay = np.abs(1/np.max(roots.real))
        tau_rise_dqpo = np.abs(1/np.min(roots.real))
        tau_decorr = (tau_decay + tau_rise_dqpo)*np.pi/2
 
    return np.array([xi, tau_decay, tau_rise_dqpo, tau_perturb, tau_decorr, omega_0])

In [6]:
flag = [float('inf')]

# Converts a string into a float list.
# String must be in the following format:
# '[1.01, 2.02, 3.03, 4.04]'
# Returns an array of len=1 with the one index='inf' if the String
# cannont be converted into a list of floats.
# Takes an optional string char for the characters to split by
def convert_to_float_list(str_list, **kwargs):
    char = kwargs.get('char', None)
    debug = kwargs.get('debug', False)
    
    if debug:
        print(str_list + ' foo')
        
    str_list = str_list.replace('[', '')
    str_list = str_list.replace(']', '')
    
    if debug:
        print(str_list + ' bar')
    
    try:
        return [float(i) for i in str_list.split(char)]
    except ValueError:
        return flag

In [21]:
bat_ids = []
xis = []
tau_decays = []
tau_rise_dqpos = []
tau_perturbs = []
tau_decorrs = []
omega_0s = []

for index, row in agn_dataframe.iterrows():
    bat_id = row['BAT ID']
    dho_params = convert_to_float_list(row['DHO MCMC Fit'])
    timescales = dho_timescales(dho_params)
    
    bat_ids.append(bat_id)
    xis.append(timescales[0])
    tau_decays.append(timescales[1])
    tau_rise_dqpos.append(timescales[2])
    tau_perturbs.append(timescales[3])
    tau_decorrs.append(timescales[4])
    omega_0s.append(timescales[5])
    
timescales_df = pd.DataFrame({'xi': xis, 
                              'tau_decay': tau_decays, 'tau_rise_dqpo': tau_rise_dqpos, 'tau_perturb': tau_perturbs, 'tau_decorr': tau_decorrs, 
                              'omega_0': omega_0s})

agn_dataframe.join(timescales_df).to_csv('adjusted_agn_dataframe.csv')

### Over-damped and QPO's

In [2]:
import numpy as np
import pandas as pd

In [4]:
# read data frame
agn_dataframe = pd.read_csv("adjusted_agn_dataframe.csv")

# determined if overdamped or qpo
xis = agn_dataframe['xi']
oscillations = []

for xi in xis:
    if xi > 1:
        oscillations.append('overdamped')
    elif xi < 1:
        oscillations.append('qpo')
    else:
        oscillations.append('critically damped')
        
agn_dataframe['oscillation'] = oscillations

# save dataframe
agn_dataframe.to_csv('agn_dataframe.csv')

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Filenames,RA,DEC,Times (MJD),Magnitudes,Mag Errors,Best DRW Fit,Best DRW ARMA Fit,...,log_Eratio,Eratio Ref,SNR,xi,tau_decay,tau_rise_dqpo,tau_perturb,tau_decorr,omega_0,oscillation
0,0,0,0.2032_-7.1532_ztfg,0.2,-7.2,[58283.4428935 58286.4428704 58295.4461921 583...,[17.5169697 17.4566288 17.5868244 17.6882477 1...,[0.02475628 0.02392791 0.02578288 0.02741087 0...,[1.01005017 1.39835993],[0.37155817 7.15431992],...,0.00,Swift,682.784148,0.005435,2.692902,0.091962,3.283483e-02,0.144449,68.326007,qpo
1,1,1,0.8643_27.6548_ztfg,0.9,27.7,[58263.4847338 58271.4756597 58274.4529861 582...,[18.0273285 17.6561546 17.9927864 17.9830742 1...,[0.02695774 0.0219428 0.02641839 0.02626968 0...,[1.23339214 1.01005017],[0.4445147 3.61899584],...,,,678.199206,63.743687,14.956146,0.000920,2.361631e-01,23.494505,8.523555,overdamped
2,2,2,1.0082_70.3217_ztfg,1.0,70.3,[58252.467963 58252.4684838 58255.4317361 582...,[20.3061562 20.148098 19.9521751 20.8079872 2...,[0.12992024 0.14134934 0.12475588 0.19658697 0...,[1.05007515 2.75888692],[ 0.3858476 45.03722844],...,,,134.024589,8.219890,116.998949,0.436136,7.167888e+00,184.466599,0.139990,overdamped
3,3,3,1.5813_20.2029_ztfg,1.6,20.2,[58268.4661921 58277.4619213 58280.447419 582...,[14.7221251 14.7285233 14.7703943 14.734273 1...,[0.01065728 0.01065795 0.0106633 0.01065858 0...,[ 1.19507497 53.87507134],[4.33108247e-01 1.85527758e+30],...,-0.67,TN12,1332.673271,21.282623,97.989004,0.054144,3.026407e-01,154.005815,0.434148,overdamped
4,4,4,10.72_-23.541_ztfg,10.7,-23.5,[58330.4594676 58333.4387037 58336.4817477 583...,[15.3464823 15.3872137 15.394475 15.3011103 1...,[0.01968619 0.01972976 0.01973794 0.01964213 0...,[1.14886796 2.24047155],[ 0.41877477 19.22288913],...,-0.42,Swift,798.718882,7.535967,43.502442,0.193208,2.615524e+00,68.636966,0.344930,overdamped
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382,382,382,90.5437_28.4721_ztfg,90.5,28.5,[58204.1298495 58204.1723958 58205.12875 582...,[15.1023655 15.1037445 15.0927429 15.0910635 1...,[0.0117217 0.01172357 0.01170872 0.01170646 0...,[ 1.01005017 599.91287317],[0.37155817 inf],...,-1.36,TN12,1280.184352,4.360995,313.305699,4.230454,9.291800e-08,498.784621,0.027468,overdamped
383,383,383,90.6581_65.3712_ztfg,90.7,65.4,[58204.1904861 58207.247662 58207.2566435 582...,[15.5720406 15.8300734 15.7268238 15.872262 1...,[0.01308607 0.01364994 0.01340647 0.01375692 0...,[3.40444911 1.01005017],[0.7454753 2.16878425],...,,,1203.276696,93.848543,69.302788,0.001967,1.007317e+00,108.863656,2.708286,overdamped
384,384,384,91.454_-27.9112_ztfg,91.5,-27.9,[58205.1373843 58386.5221412 58423.4174653 584...,[17.0925713 17.5598412 17.2358112 17.1890182 1...,[0.02607831 0.03267109 0.0278092 0.02721838 0...,[ 1.05730395 63.68998289],[3.88368046e-01 1.10313387e+38],...,-1.53,TN12,640.464361,22.335593,259.333867,0.130089,1.239701e+00,407.565028,0.172167,overdamped
385,385,385,93.9015_71.0375_ztfg,93.9,71.0,[58204.1684606 58204.1900116 58207.2527199 582...,[14.4023151 14.4292612 14.5862532 14.6108074 1...,[0.01216231 0.01217893 0.01228528 0.01230348 0...,[1.05273756 1.01005017],[0.38677801 4.02364412],...,-0.25,Swift,1186.235166,0.003882,0.460534,0.011233,1.259000e+02,0.017645,559.355963,qpo


### Class and Type

In [1]:
import numpy as np
import pandas as pd

In [3]:
catalog = pd.read_csv('BAT_70m_catalog_20nov2012.txt', sep='|')

# removed unamed colemns
catalog = catalog.loc[:, ~catalog.columns.str.contains('^Unnamed')]

# round fit RA and DEC
decimal_places = 1
catalog = round_colemn(catalog, 'CTPT_RA', decimal_places)
catalog = round_colemn(catalog, 'CTPT_DEC', decimal_places)

# create new dataframe with RAs, Decs, and desired data
catalog = pd.DataFrame({'RA': catalog['CTPT_RA'], 'DEC': catalog['CTPT_DEC'],
                       'CL2': catalog['CL2'], 'TYPE': catalog['TYPE']})
catalog.to_csv('catalog.csv')
catalog

Unnamed: 0,RA,DEC,CL2,TYPE
0,0.2,-7.2,2,Galaxy
1,0.4,-77.0,4,Sy1
2,0.6,3.4,4,Sy1.5
3,0.9,27.7,2,Galaxy
4,1.0,70.3,5,Sy2
...,...,...,...,...
1202,358.0,-1.2,8,Sy1N-Galaxy
1203,358.1,58.7,6,AGN
1204,358.2,-17.1,2,galaxy
1205,359.8,-30.6,7,BLLac


In [4]:
# read data frame
agn_dataframe = pd.read_csv("adjusted_agn_dataframe.csv")

# merge dataframes
agn_dataframe = pd.merge(agn_dataframe, catalog, how='left', on=['RA', 'DEC'], validate='one_to_one')

# removed unamed colemns
agn_dataframe = agn_dataframe.loc[:, ~agn_dataframe.columns.str.contains('^Unnamed')]

# save dataframe
agn_dataframe.to_csv('agn_dataframe.csv')