In [None]:
import pandas as pd
from astropy.coordinates import SkyCoord
import astropy.units as u
from astroquery.ipac.ned import Ned
import numpy as np
from astropy.cosmology import WMAP9 as cosmo
from urllib.request import urlretrieve
from collections import Counter
import matplotlib.pyplot as plt
import pickle
from astropy.io import fits
from astropy.table import Table
import glob
import os
from scipy.interpolate import CubicSpline
import textwrap
import seaborn as sns
import scipy.stats as scp

plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Times"],
    "text.latex.preamble": r"\usepackage{amsmath}\usepackage{mathptmx}",  # Times Roman
    "hatch.linewidth": 3.0,
})
sns.set_context("paper",font_scale=1.75)


def make_desig(data, ra_key='ra', dec_key='dec'):
    """make designation if df has 'ra' and 'dec' columns"""
    desig=[]
    for posstring in SkyCoord(data[ra_key].values*u.deg, data[dec_key].values*u.deg).to_string("hmsdms"):
        posstring = posstring.split(' ')
        des_ra = posstring[0][0:2]+posstring[0][3:5]
        des_dec = posstring[1][0:3]+posstring[1][4:6]
        desig.append('J'+des_ra+des_dec)
    return desig

def pos(row):
    """make skyCoord object for HST coord cone search"""
    return SkyCoord(ra=row['RA']*u.deg, dec=row['DEC']*u.deg)

def load_bigmac():
    """crossmatch sample with big MAC"""
    # read in big  mac
    bigmac = pd.read_csv("/home/insepien/research-data/GFG.csv")
    # format designation
    desigs = []
    for i in range(len(bigmac)):
        name = bigmac['Name1'].loc[i].replace("SDSS","")
        if name[0] == "J":
            if "+" in name:
                desig = name.split("+")[0][:5] + "+" + name.split("+")[1][:4]
                desigs.append(desig)
            elif "-" in name:
                desig = name.split("-")[0][:5] + "-" + name.split("-")[1][:4]
                desigs.append(desig)
            else: print(name) 
        else:
            desigs.append(name)
    bigmac['Desig'] = desigs
    # optionally can get decals images
    # for n in mul_bm.index:
    #     urlretrieve('http://legacysurvey.org/viewer/jpeg-cutout?ra='+str(mul_bm.loc[n,'RA'])+'&dec='+str(mul_bm.loc[n,'DEC'])+'&layer=decals-dr7&pixscale=0.27&bands=grz',
    #                 "/home/insepien/research-data/hst/mul_bm/"+str(mul_bm.loc[n,'DESIG'])+'.jpg')
    return bigmac

def cal_sep(theta, z):
    """return dual sep in kpc given scalar angle sep in arcsec"""
    angle = (theta*u.arcsec).to(u.rad).value
    return (cosmo.angular_diameter_distance(z)*angle).to(u.kpc)


def f(on, theta):
    """plot decal image and annulus at detected dual separation theta"""
    fn = "/home/insepien/research-data/hst/mul_bm/"+on+".jpg"
    decals_plate_scale = 0.236 #''/pix
    pix_sep = theta/decals_plate_scale

    fig,ax = plt.subplots()
    im = plt.imread(fn)
    midF = im.shape[0]/2
    ax.imshow(im)
    circ = plt.Circle((midF,midF), pix_sep, fill=False, color='white',alpha=0.5,label=f"{theta:.2f}''")
    ax.add_patch(circ)
    ax.legend()
    ax.set_title(on)

def lbol_to_m(lbol,edd_rate=0.3):
    ledd = lbol/edd_rate
    return np.log10(ledd/(1.28e46/1e8))

def m_to_lbol(m,edd_rate=0.3):
    ledd = 1.28e46*m/1e8
    return np.log10(ledd*edd_rate)

load data

In [None]:
alpaka = pd.read_pickle("/home/insepien/research-data/alpaka/alpaka_z05_merged_wise.pkl")
# alpaka_fullZ = Table(fits.getdata("/home/insepien/research-data/alpaka/ALPAKA_v1_withDes.fits")).to_pandas()
magel_o = pd.read_pickle("/home/insepien/research-data/alpaka/magellan/alpaka_39fits.pkl")
magel = alpaka[(alpaka['RA'].isin(magel_o['RA'])) & (alpaka['DEC'].isin(magel_o['DEC']))]
bigmac = load_bigmac()
# separate dual and singles
dualnames = ["J1215+1344","J1222-0007"]
singlenames =  magel[~ magel['Desig'].isin(dualnames)]['Desig']
duals = alpaka[alpaka['Desig'].isin(dualnames)]
singles = alpaka[alpaka['Desig'].isin(singlenames)]
print(duals.shape,singles.shape)
# some masks
j1010mask = alpaka['Desig'] == "J1010+1413"
j1000mask  = alpaka['Desig'] == "J1000+1242"
mask171 = ((alpaka['OIII_5007_LUM_DERRED']*600 > 1e46) & (alpaka['Z']> 0.14) & (alpaka['Z']<0.22))


define functions to do linear fit to 4 fluxes and get Lir, fixed to 10% bolometric correction already

In [None]:
from astropy import constants as const
def get_wise_mags(wise_):
    """get wise mags and mag errors from data frame of ipac search results"""
    ## get wise mags and errors
    w1mag = wise_['w1mpro']
    w2mag = wise_['w2mpro']
    w3mag = wise_['w3mpro']
    w4mag = wise_['w4mpro']
    wmags_ = np.array([w1mag, w2mag, w3mag, w4mag])
    wmags_err_ = np.array([wise_['w1sigmpro'], wise_['w2sigmpro'], wise_['w3sigmpro'], wise_['w4sigmpro']])
    return wmags_, wmags_err_

def wise_lum_from_mag_rest_frame(wmags_, wmags_err_, rest_wavelength_, redshift_):
    """calculate wise luminosity from magnitude at rest-frame wavelength"""
    ## change mags to fluxes -- http://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#example
    obs_wavelength_ = (1 + redshift_) * rest_wavelength_
    zeromagflux = np.array([309.540, 171.787, 31.674, 8.363])*u.Jy
    fluxdens = zeromagflux*10**(-wmags_/2.5) # in Jy
    # now either interpolate flux dens to some wavelength or use a band from wise
    wise_wavelengths = np.array([3.4, 4.6, 12., 22.]) # 1e-6 m
    fluxdens_err = zeromagflux*10**(-wmags_err_/2.5)
    ## interpolate - use straight line
    wiseflux = np.polyfit(wise_wavelengths, fluxdens.value,1, w=1./fluxdens_err)
    ## get flux at obs wavelength, i.e. just a straight line here
    obs_flux = (wiseflux[0]*obs_wavelength_+wiseflux[1])*u.Jy 
    obs_hz = (const.c/(obs_wavelength_*u.micron)).to(u.Hz)
    lum = (obs_flux*obs_hz*4*np.pi*
           cosmo.luminosity_distance(redshift_)**2).to(u.erg/u.s)
    return lum

def correct_ir():
    """correct IR luminosity at 15 microns rest frame based on Hopkins+20"""
    # load hopkins bolometric correction
    with open("/home/insepien/research-data/pop-result/bc.txt","r") as f:
        d = f.read().splitlines()
    hopkins = pd.DataFrame([d[1:][i].split(' ') for i in range(len(d[1:]))],columns=d[0].split(' '))
    Lbol = np.array(list(hopkins['Lbols'].values), dtype=float)
    LIR = np.array(list(hopkins['LIRs'].values), dtype=float)
    spl = CubicSpline(LIR, Lbol)
    return spl

def get_wise_ir_lums(cat,wl_=22):
    """calculate wise IR luminosity and bolometric lum, 
        default keys (variants of 'desig') are for magellan sample"""
    ## get wise mags and errors
    wmags, wmags_err_nan = get_wise_mags(cat)
    # replace nan values in mag error with median
    wmags_err = np.nan_to_num(wmags_err_nan,np.median(wmags_err_nan))
    # calculate luminosity
    wise_lums = np.zeros((len(cat)))
    for i in cat.index.values:
        z = cat.loc[i,'Z']
        wise_lums[i] = wise_lum_from_mag_rest_frame(wmags[:,i], wmags_err[:,i], wl_, z).value
    # check wavelength to see how to do bolo correction
    if wl_==15: # use Hopkins+2020 if at 15 microns
        spl = correct_ir()
        irbol = 10**(spl(np.log10(wise_lums)))
    else: # else correct by 12%
        irbol = wise_lums/0.1
    return wmags, wise_lums,irbol

define functions to interpolate 3 wise bands

In [None]:
from astropy import constants as const
def get_wise_mags_3band(wise_):
    """get wise mags and mag errors from data frame of ipac search results"""
    ## get wise mags and errors
    w2mag = wise_['w2mpro']
    w3mag = wise_['w3mpro']
    w4mag = wise_['w4mpro']
    wmags_ = np.array([w2mag, w3mag, w4mag])
    wmags_err_ = np.array([wise_['w2sigmpro'], wise_['w3sigmpro'], wise_['w4sigmpro']])
    return wmags_, wmags_err_

import scipy.interpolate as intp
def wise_lum_from_mag_rest_frame_3band_interp(wmags_, wmags_err_, rest_wavelength_, redshift_):
    """calculate wise luminosity from magnitude at rest-frame wavelength"""
    ## change mags to fluxes -- http://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#example
    obs_wavelength_ = (1 + redshift_) * rest_wavelength_
    zeromagflux = np.array([171.787, 31.674, 8.363])*u.Jy
    fluxdens = zeromagflux*10**(-wmags_/2.5) # in Jy
    # now either interpolate flux dens to some wavelength or use a band from wise
    wise_wavelengths = np.array([4.6, 12., 22.]) # 1e-6 m
    fluxdens_err = zeromagflux*10**(-wmags_err_/2.5)
    ## interpolate spline
    bspl = intp.make_interp_spline(np.log10(wise_wavelengths), np.log10(fluxdens.value),k=2)
    ## get flux at obs wavelength, i.e. just a straight line here
    obs_flux = 10**(bspl(np.log10(obs_wavelength_)))*u.Jy 
    obs_hz = (const.c/(obs_wavelength_*u.micron)).to(u.Hz)
    lum = (obs_flux*obs_hz*4*np.pi*
           cosmo.luminosity_distance(redshift_)**2).to(u.erg/u.s)
    return lum

def correct_ir():
    """correct IR luminosity at 15 microns rest frame based on Hopkins+20"""
    # load hopkins bolometric correction
    with open("/home/insepien/research-data/pop-result/bc.txt","r") as f:
        d = f.read().splitlines()
    hopkins = pd.DataFrame([d[1:][i].split(' ') for i in range(len(d[1:]))],columns=d[0].split(' '))
    Lbol = np.array(list(hopkins['Lbols'].values), dtype=float)
    LIR = np.array(list(hopkins['LIRs'].values), dtype=float)
    spl = CubicSpline(LIR, Lbol)
    return spl

def get_wise_ir_lums_3band_intp(cat,wl_=22):
    """calculate wise IR luminosity and bolometric lum, 
        default keys (variants of 'desig') are for magellan sample"""
    ## get wise mags and errors
    wmags, wmags_err_nan = get_wise_mags_3band(cat)
    # replace nan values in mag error with median
    wmags_err = np.nan_to_num(wmags_err_nan,np.median(wmags_err_nan))
    # calculate luminosity
    wise_lums = np.zeros((len(cat)))
    for i in cat.index.values:
        z = cat.loc[i,'Z']
        try:
            wise_lums[i] = wise_lum_from_mag_rest_frame_3band_interp(wmags[:,i], wmags_err[:,i], wl_, z).value
        except:
            wise_lums[i] = 0
            print(i)
    # check wavelength to see how to do bolo correction
    if wl_==15: # use Hopkins+2020 if at 15 microns
        spl = correct_ir()
        irbol = 10**(spl(np.log10(wise_lums)))
    else: # else correct by 12%
        irbol = wise_lums/0.1
    return wmags, wise_lums,irbol

check interpolation

In [None]:
wmag, werr = get_wise_mags_3band(alpaka)
def wise_lum_from_mag_rest_frame_3band_interp(wmags_, wmags_err_, rest_wavelength_, redshift_):
    """calculate wise luminosity from magnitude at rest-frame wavelength"""
    ## change mags to fluxes -- http://wise2.ipac.caltech.edu/docs/release/allsky/expsup/sec4_4h.html#example
    obs_wavelength_ = (1 + redshift_) * rest_wavelength_
    zeromagflux = np.array([171.787, 31.674, 8.363])*u.Jy
    fluxdens = zeromagflux*10**(-wmags_/2.5) # in Jy
    # now either interpolate flux dens to some wavelength or use a band from wise
    wise_wavelengths = np.array([4.6, 12., 22.]) # 1e-6 m
    ## interpolate spline
    bspl = intp.make_interp_spline(np.log10(wise_wavelengths), np.log10(fluxdens.value),k=2)
    ## get flux at obs wavelength, i.e. just a straight line here
    obs_flux = 10**(bspl(np.log10(obs_wavelength_)))*u.Jy 
    return fluxdens.value, bspl, obs_flux.value

errind = []
res = []
for i in alpaka.index.values:
    z = alpaka.loc[i,'Z']
    try:
        res.append(wise_lum_from_mag_rest_frame_3band_interp(wmag[:,i], werr[:,i], 15, z))
    except:
        res.append([0,0,0])
        errind.append(i)

spl = [r[1] for r in res]
flxd = [r[0] for r in res]
obsflx = [r[2] for r in res]
obswl = [(1+z)*15 for z in alpaka['Z'].values]


In [None]:
wwl = np.array([4.6, 12., 22.])
wwl_fine = np.linspace(wwl.min(),wwl.max(),20)
fig,ax = plt.subplots(1,5,figsize=(12,2))
for i in range(5):
    ax[i].scatter(np.log10(wwl),np.log10(flxd[i]))
    ax[i].plot(np.log10(wwl_fine), spl[i](np.log10(wwl_fine)))
    ax[i].scatter(np.log10(obswl[i]),np.log10(obsflx[i]))
    ax[i].set_ylabel("log flx dens")
    ax[i].set_xlabel("log wlen")
fig.tight_layout();

calculate luminosity of whole alpaka here and compare

In [None]:
# linear fit flxds
wmags15, wise_lums15,irbol15 = get_wise_ir_lums(alpaka,15)
wmags22, wise_lums22,irbol22 = get_wise_ir_lums(alpaka,22)
# 2nd order interpolate
wmags15_3b, wise_lums15_3b,irbol15_3b = get_wise_ir_lums_3band_intp(alpaka,15)

$\nu L_{\nu}$

In [None]:
np.array(wise_lums15_3b)[j1010mask]

In [None]:
obshz = (const.c/obswl/u.micron).to(u.Hz)
emhz = (const.c/15/u.micron).to(u.Hz)

In [None]:
zmask = alpaka['Z']<0.2
plt.scatter(alpaka['OIII_5007_LUM'][zmask],wise_lums15_3b[zmask],label='rest15,spline',s=1)
# plt.scatter(alpaka['OIII_5007_LUM'][zmask],wise_lums15[zmask],label='linear',s=1,alpha=0.5)
# plt.scatter(alpaka['OIII_5007_LUM'][zmask],alpaka['wiseLum'][zmask],label='observed',s=1,alpha=0.5)
plt.plot([1e40,2e43],[3.5e42,1e46])
plt.axvline(1.5e41)
plt.axhline(5e43)

plt.yscale('log')
plt.xscale('log')
plt.xlim(1e40,2e43)
plt.ylim(1e42,1e46)
plt.xlabel("L[OIII]")
plt.ylabel("Wise $\\nu L_{\\nu}$ (rest frame)")
plt.legend();

snap stuffs

In [None]:
notObserved = ~ alpaka['Desig'].isin(['J1000+1242', 'J1010+1413','J1352+6541', 'J1356+1026','J1222-0007'])
snap = (np.log10(alpaka['irbol'])>45.9) & notObserved
snapminrest = irbol15_3b >= irbol15_3b[snap].min()
newL = 45.769
newcut = (np.log10(irbol15_3b) >= newL) & notObserved

(newcut & snap).sum(), newcut.sum(), lbol_to_m(10**newL)

In [None]:
plt.plot(np.arange(44,48),np.arange(44,48),c='k',alpha=0.5)
plt.scatter(np.log10(irbol15_3b), np.log10(alpaka['irbol']),label='all type-2')
plt.scatter(np.log10(irbol15_3b)[snap], np.log10(alpaka['irbol'])[snap],label='SNAP 146')
plt.ylim(45.5,47)
plt.xlim(45.5,47)
plt.axvline(45.8,label=f'new cut $L>${newL},\nMbh $>${lbol_to_m(10**newL):.3f},\n{(newcut & snap).sum()}/146 in snap')
plt.axvline(np.log10(irbol15_3b[snap].min()),c='r',label='SNAP old min',alpha=0.5)
plt.xlabel("Log rest frame 15, spline")
plt.ylabel('Log observed 22')
plt.legend();

cross-match with Big MAC for paper intro

In [None]:
# get some stats on sub-kpc pairs for science justification
not_recoil = bigmac['Primary System Type']!='Recoil Candidate'
subkpc_mask = not_recoil & (bigmac['Sep(kpc)']<1) & (bigmac['Sep(kpc)']>0.3)
kpc_mask = not_recoil & (bigmac['Sep(kpc)']>1)
dual_mask = subkpc_mask | kpc_mask
confirmed_mask = dual_mask & ~bigmac["Primary System Type"].str.contains("Candidate")

# fractions
print(f"fraction of sub-kpc/total dual = {subkpc_mask.sum()}/{dual_mask.sum()} = {subkpc_mask.sum()/dual_mask.sum():.3f}")
print(f"fraction of confirmed/total dual = {confirmed_mask.sum()/dual_mask.sum():.3f}")

# get some methods of measuring sub-kpc sep
anyl_meth = Counter(bigmac[subkpc_mask]['Parsed Analysis Method'])
print("1st most common method: ",anyl_meth.most_common(2)[0])
print("2nd most common method: ",anyl_meth.most_common(2)[1])

check airmass and coords stuffs

In [None]:
from astroplan import Observer, FixedTarget
from astropy.time import Time

twi_end = Time('2020-02-08 0:48:00') ## local time 21:48
twi_beg = Time('2020-02-08 9:06:00') ## local time 6:06
start = twi_end
end = twi_beg
magellan = Observer.at_site("Las Campanas Observatory")
d = alpaka[mask171]
targets = FixedTarget(coord=SkyCoord(d['RA'].values*u.deg,d['DEC'].values*u.deg))
time_window = start + (end-start)*np.linspace(0,1,len(d))
is_up = magellan.target_is_up(time_window,targets)

fig, ax = plt.subplots(1,2,figsize=(12,4))
ax[0].scatter(d[is_up]['RA'],d[is_up]['DEC'],label=f'{np.sum(is_up)}/{len(d)}\n up at magellan')
ax[0].scatter(magel['RA'],magel['DEC'], label='39 observed')
ax[0].axhline(30)
ax[0].set_xlabel("RA")
ax[0].set_ylabel("DEC")
ax[0].legend()

airmass_time_window = start + (end-start)*np.linspace(0,1,10)
dec = np.array([30,50,60])
coords = SkyCoord(200*u.deg,dec*u.deg)
pos = [magellan.altaz(airmass_time_window, SkyCoord(200*u.deg,d*u.deg)) for d in dec]

for i in range(3):
    plt.plot(pos[i].alt, label=f"DEC = {dec[i]} deg")
ax[1].axhline(0,alpha=0.5)
ax[1].set_xlabel("$\\approx$ time")
ax[1].set_ylabel("Altitude")
ax[1].legend()
fig.tight_layout();

sample plot of IR only for methods section

In [None]:
def norm_hist(ax,quant,ecolor,fcolor,bin_arr=[],alpha=1,horz=False,hatchsym=''):
    """normalize histogram sum count to 1 given some quantity (quant)
        args: edgecoloe, facecolor, bin array, opacity, flag for plotting horizontal hist"""
    count, bin = np.histogram(quant,bins=bin_arr)
    if horz:
        ax.barh(bin[:-1],count/np.sum(count), height= np.diff(bin),
                align='edge',edgecolor=ecolor,facecolor=fcolor,alpha=alpha,hatch=hatchsym,hatch_linewidth=0.5)
    else:
        ax.bar(bin[:-1],count/np.sum(count),width = np.diff(bin),
               align='edge',edgecolor=ecolor,facecolor=fcolor,alpha=alpha,hatch=hatchsym,hatch_linewidth=0.5)
        


fig,ax = plt.subplots(2,2,gridspec_kw={'width_ratios': [2,0.5],'height_ratios': [0.7,2]},figsize=(8,6),
                      sharey='row',sharex='col',dpi=200)
plt.subplots_adjust(wspace=0.05,hspace=0.05)

mdual_labs = ['J1215+1344','J1222-0007 W', 'J1222-0007 E']

# scatter type-2 in z=0.1-0.5 with match in wise
ax[1,0].scatter(alpaka['Z'],np.log10(alpaka['irbol']),s=2,alpha=0.1,color="plum")
# same as above but Lbol OIII > 1e46
ax[1,0].scatter(alpaka['Z'][mask171],np.log10(alpaka['irbol'][mask171]),s=7,color="darkseagreen",marker="x")
# magellan sample
ax[1,0].scatter(singles['Z'],np.log10(singles['irbol']),s=50,marker='2',color='indigo',alpha=0.7)
[ax[1,0].scatter(duals['Z'].values[i],np.log10(duals['irbol'].values[i]),
                 s=40,marker=['s',"",'o'][i],color='indigo',alpha=0.7,label=mdual_labs[i]) for i in [0,2]];

ax[1,0].set_ylim(43.5,47.5)
ax[1,0].set_xlim(0.09,0.3)
ax[1,0].set_xlabel("Redshift")
ax[1,0].set_ylabel('Log($L_{\\rm{bol,~IR}}$) $[\\rm{erg~s}^-1]$')
ax[1,0].legend(fontsize=10) 

# hist z
binz = np.linspace(np.min(alpaka['Z']),np.max(alpaka['Z']),20)
norm_hist(ax[0,0],alpaka['Z'],"plum",'none',bin_arr=binz)
norm_hist(ax[0,0],alpaka['Z'][mask171],"darkseagreen",'none',bin_arr=binz,hatchsym="/")
norm_hist(ax[0,0],np.concatenate([singles['Z'],duals['Z']]),'none','indigo',bin_arr=binz,alpha=0.5)
ax[0,0].set_ylabel("Fraction")

# hist Lbol
binL = np.linspace(np.log10(np.min(alpaka['irbol'])), np.log10(np.max(alpaka['irbol'])),10)
norm_hist(ax[1,1],np.log10(alpaka['irbol'].dropna()),'plum',"none",binL,horz=True)
norm_hist(ax[1,1],np.log10(alpaka['irbol'].dropna()[mask171]),'darkseagreen',"none",binL,horz=True,hatchsym="/")
norm_hist(ax[1,1],np.log10(np.concatenate([singles['irbol'],duals['irbol']])),'none','indigo',binL,alpha=0.5,horz=True)
ax_top = ax[1,1].secondary_xaxis("top")
ax_top.set_xlabel("Fraction")
ax_top.set_xticks([0,0.2])
ax_top.set_xticklabels([0,0.2])
ax[1,1].set_xticks([])

ax[0,1].axis('off');

In [None]:
fig, ax = plt.subplots(1,2,figsize=(12,4),sharey=True,sharex=True,dpi=500)
ax[0].scatter(alpaka['Z'],np.log10(alpaka['OIII_5007_LUM_DERRED']*800),s=2,alpha=0.1,color='plum')
ax[0].scatter(magel['Z'],np.log10(magel['OIII_5007_LUM_DERRED']*800),s=50,alpha=0.5,marker="2",color='indigo')
mdual_labs = ['J1215+1344','J1222-0007 W', 'J1222-0007 E']
[ax[0].scatter(duals['Z'].values[i],np.log10(duals['OIII_5007_LUM_DERRED'].values*800)[i],s=40,marker=['s','o',"^"][i],color='indigo',alpha=0.5,label=mdual_labs[i]) for i in range(3)];

ax[1].scatter(alpaka['Z'],np.log10(alpaka['irbol']),s=2,alpha=0.1,color='plum')
ax[1].scatter(singles['Z'],np.log10(singles['irbol']),s=50,marker='2',color='indigo',alpha=0.5)
[ax[1].scatter(duals['Z'].values[i],np.log10(duals['irbol'].values)[i],s=40,marker=['s','o','^'][i],color='indigo',alpha=0.5,label=[mdual_labs[0],"",mdual_labs[2]][i]) for i in [0,2]];

ax[0].scatter(alpaka[j1010mask]['Z'],np.log10(alpaka[j1010mask]['OIII_5007_LUM_DERRED']*800),marker="x",color="k",label='J1010+1413')
ax[1].scatter(alpaka[j1010mask]['Z'],np.log10(alpaka[j1010mask]['irbol']),marker="x",color="k",label='J1010+1413')

ax[0].set_ylim(43,48)
ax[0].set_xlim(0.09,0.3)
[a.set_xlabel("Redshift") for a in ax]
[ax[i].set_ylabel(['Log($L_{\\rm{bol,~[OIII]~dered}}$) $[\\rm{erg~s}^-1]$','Log($L_{\\rm{bol,~IR}}$) $[\\rm{erg~s}^-1]$'][i]) for i in range(2)]
[ax[i].legend(fontsize=10) for i in range(2)]
[ax[i].text(0.025, 0.975, ["[OIII]","IR"][i], transform=ax[i].transAxes, ha='left', va='top',fontsize=20) for i in range(2)]

;

check emission line luminosities: J1010 is dimmer than rest of sample based on OIII dereddened, but is brighter than the rest in SDSS R-band. R-band covers OIII and HB, so it should be OIII dominated. Therefore OIII dereddened measurments do not make sense

In [None]:
#### check emission lines
j10 = alpaka[alpaka['Desig'] == "J1010+1413"]
keys = ['HB_LUM',
 'OIII_4959_LUM',
 'OIII_5007_LUM',
 'NII_6548_LUM',
 'HA_LUM',
 'NII_6584_LUM',
 'NVS_LUM']
i=0
clr = sns.color_palette("colorblind", len(keys))
for k in keys:
    lums = [m for m in magel[k] if np.isfinite(m) and m!=0]
    plt.hist(np.log10(lums),color=clr[i],alpha=0.7,label=k)
    try:
        plt.axvline(np.log10(j10[k].values),c=clr[i])
    except:
        print(k)
    i+=1
plt.xlim((37.5,43.5))
plt.legend(bbox_to_anchor=(1,1))
plt.title("J1010 is generally brighter than magellan sample");

code below to make dual sep vs redshift plot (used for hst phase 1)

In [None]:
from matplotlib.lines import Line2D
import matplotlib.patches as patches
sns.set_context("paper",font_scale=1.75)
sns.set_style('ticks')
sns.set_palette('colorblind')
figparams = {'font.family': 'DejaVu Sans',
            'font.serif':'Times',
            'hatch.linewidth' : 3.0}
plt.rcParams.update(figparams)

fig,ax = plt.subplots(figsize=(15,7),dpi=500)
# plot hst resolution limit
for reso in [1,5,20]:
    seeing = 0.04*2.5*reso
    ax.plot(np.linspace(0,3),cal_sep(seeing, np.linspace(0,3)),c='k',linestyle="--",alpha=0.5)
    ax.text(2.5, 1*reso, f"{seeing:.1f} arcsec", color='k',fontsize=15)

# add survey vol
# rect = patches.Rectangle((0.1, 0), 0.4-0.1, 1, linewidth=2, edgecolor='w', facecolor='darkseagreen',alpha=0.3)
# ax.add_patch(rect)
# ax.text(0.2,1,"Our survey",c="darkolivegreen",fontsize=15)

# kpc points ie sep>1 with confidence>0.5, z0-3, have been imaged
conf_kpc = dual['ST1 Confidence Flag']>=0.5
imag_kpc = dual['Parsed Analysis Method'].str.contains("Imaging")
z_mask = (dual['z1']>0)&(dual['z1']<3)
kpcdf = dual[conf_kpc&imag_kpc&z_mask]
paper = list(kpcdf['Paper(s)'].str.split(" ; ").explode().value_counts().keys())[:15]
all_markers = list(Line2D.markers.keys())[5:]
all_colors = sns.color_palette("colorblind", len(paper))
for j in range(len(paper)):    
    papermask = np.array([np.isin(paper[j],kpcdf.loc[i,'Paper(s)'].split(" ; ")).item() for i in kpcdf.index.values])
    for k in kpcdf[papermask].index.values:
        ulabel = paper[j] if k==kpcdf[papermask].index.values[0] else None
        plt.scatter(kpcdf[papermask]['z1'][k],kpcdf[papermask]['Sep(kpc)'][k],marker=all_markers[j],c=all_colors[j],s=30,label=ulabel)

# plot points with confidence >0.5 and have been imaged
confidence_mask = subkpc_dual['ST1 Confidence Flag']>=0.5
imaging_mask  = subkpc_dual['Parsed Analysis Method'].str.contains("Imaging")
distance_mask = subkpc_dual['Sep(kpc)']>0.2  # to remove radio sources
df = subkpc_dual[confidence_mask&imaging_mask&distance_mask]
lagn = np.log10([0.033e44,0.135e44,3e44,10**43.23*600,6e46])
subkpc_markers = list(Line2D.markers.keys())[1:6]
for i,m,mrk in zip(df.index.to_list(),lagn,subkpc_markers): # mark confirmed and very sure candidates differently
    wrapped_label = "\n".join(textwrap.wrap(df['Paper(s)'][i], width=60))
    sca = ax.scatter(df['z1'][i], df['Sep(kpc)'][i],label=wrapped_label,s=30,cmap='magma',c=m,marker=mrk,vmin=np.min(lagn)-0.5,vmax=np.max(lagn)+1)

cbar_ax = fig.add_axes([0.99, 0.125, 0.01, 0.75]) 
cbar = fig.colorbar(sca,cax=cbar_ax)
cbar.set_label("Log($L_{AGN}$) [erg/s]")

ax.set_xlabel("Redshift")
ax.set_ylabel("Projected separation [kpc]")
ax.set_xlim((-0.01,3))
ax.set_ylim((0.2,110))
# set top xlabel to look back time
ax_top = ax.secondary_xaxis("top")
ax_top.set_xlabel("Lookback time [Gyr]")
# interpolate to get tick positions for round lookback time
spl = CubicSpline(cosmo.lookback_time(np.linspace(0,0.5)),np.linspace(0,0.5))
xtick_pos = spl(np.arange(1,12,2))
ax_top.set_xticks(xtick_pos)
ax_top.set_xticklabels(np.arange(1,12,2))
ax.set_yscale('log')

ax.legend(ncol=3,fontsize=8,loc='lower center')
ax.grid(linestyle='--',alpha=0.5)
fig.tight_layout()
fig.savefig("hst.png",dpi=500);

compare OIII and IR for JA

In [None]:
hst_sdss152 = pd.read_pickle("/home/insepien/research-data/alpaka/snap/hstP2_snap151_alpakaWithSDSS.pkl")
# remove 4 observed targets
observed_mask = hst_sdss152['Desig'].isin(['J1000+1242', 'J1010+1413','J1352+6541', 'J1356+1026','J1222-0007'])
hst_sdss148 = hst_sdss152[~observed_mask]
hst_sdss148.reset_index(inplace=True,drop=True)
print(f"final sample shape: {hst_sdss148.shape}")
snap_mask = (alpaka['RA'].isin(hst_sdss148['RA'])) & (alpaka['DEC'].isin(hst_sdss148['DEC']))


In [None]:
fig,ax = plt.subplots(1,2,figsize=(10,4),sharex=True,dpi=300)
zm = alpaka['Z'] < 0.2
ax[0].scatter(alpaka['OIII_5007_LUM'], alpaka['wiseLum'],s=2,alpha=0.1,color='grey')
ax[1].scatter(alpaka['OIII_5007_LUM_DERRED'] ,alpaka['wiseLum'],s=2,alpha=0.1,color='grey')

#magellan
ax[0].scatter(magel['OIII_5007_LUM'], magel['wiseLum'],s=5,color='b',marker='x',label='Magellan')
ax[1].scatter(magel['OIII_5007_LUM_DERRED'] ,magel['wiseLum'],s=5,color='b',marker='x')

# snap
ax[0].scatter(alpaka['OIII_5007_LUM'][snap_mask], alpaka['wiseLum'][snap_mask],s=2,alpha=0.5,color='green',label='SNAP')
ax[1].scatter(alpaka['OIII_5007_LUM_DERRED'][snap_mask],alpaka['wiseLum'][snap_mask],s=2,alpha=0.5,color='green')

# j1010
# j1010mask = alpaka['Desig'] == "J1010+1413"
# j1000mask  = alpaka['Desig'] == "J1000+1242"
# [ax[0].scatter(alpaka['OIII_5007_LUM'][jmask], alpaka['wiseLum'][jmask],s=10,alpha=0.5,color='red',marker='x') for jmask in [j1000mask,j1010mask]]

[a.set_xscale('log') for a in ax]
[a.set_yscale('log') for a in ax]
[a.set_ylabel("$L_{\\rm{IR}}$") for a in ax]
[a.set_xlabel(lab) for a,lab in zip(ax,['$L_{\\rm{[OIII]}}$','$L_{\\rm{[OIII],dered}}$'])]
[a.set_xlim(1e40,1e44) for a in ax]
[a.set_ylim(3e42,1e46) for a in ax]
ax[0].legend(fontsize=10,loc='lower right')
fig.tight_layout();

# try resampling to correct incomplete lum

In [None]:
# get all type 2 agn in z cut from mul with match in wise
type2 = alpaka[(alpaka['Z'] > 0.14) & (alpaka['Z'] < 0.22) & (alpaka['AGN_TYPE']==2)]
magel_withwise = type2[type2['desig'].isin(magel['desig'])]
# get histograms, using sqrt smaller sample as numbers of bin
lbol_all = np.log10(type2['irbol'])
bin = np.linspace(lbol_all.min(),lbol_all.max(),int(np.ceil(np.sqrt(39))))
hist_ful = plt.hist(lbol_all,label="mullaney type-2 agn \nmatched with WISE",bins=bin)
hist_magel = plt.hist(np.log10(np.concatenate([irbol,irbol_dual])),label='Magellan sample',bins=bin)
plt.yscale('log')
plt.xlabel('Log(L_bol)')
plt.ylabel("number of AGN")
plt.legend();

In [None]:
# interpolate pdf for pretty plotting
binmid = (bin[:-1]+bin[1:])*0.5
pdf_full = CubicSpline(binmid,hist_ful[0])
pdf_magel = CubicSpline(binmid,hist_magel[0])
# use full sample pdf as weight
w = hist_ful[0]/np.sum(hist_ful[0])
# assign weight to each magel target
binnum = np.digitize(np.log10(magel_withwise['irbol']),bin)-1
weights = [w[b] for b in binnum]
magel_withwise['weights'] = weights
# sample magel with weights
new_sample_size = 20
magel_sub = [magel_withwise.sample(n=new_sample_size,weights='weights') for i in range(1000)]
pdfs_magel = [np.histogram(np.log10(magel_sub[i]['irbol']),bins=bin)[0] for i in range(1000)]
# random sample from full sample
pdfs = [np.histogram(np.log10(type2.sample(n=new_sample_size)['irbol']),bins=bin)[0] for i in range(1000)]

In [None]:
maxind = np.argmax(hist_magel[0])
magel_corrected = w/w[maxind]*hist_magel[0]
magel_corrected_spl = CubicSpline(binmid,magel_corrected)

In [None]:
# for pretty plot
x = np.linspace(binmid.min(),binmid.max(),20)
# plot full sample
plt.plot(x,pdf_full(x),c='b')
plt.scatter(binmid,hist_ful[0],c='b',label='full sample')
# plot magellan sample
plt.scatter(binmid,hist_magel[0],label='magel',c='r')
plt.plot(x,pdf_magel(x),c='r')
# correct by largest bin
plt.scatter(binmid,magel_corrected,c="g",label='corrected')
plt.plot(x,magel_corrected_spl(x),c='g')
# plot subsamples
[plt.plot(binmid, pdfs_magel[i],c='r',alpha=0.01) for i in range(500)];
[plt.plot(binmid, pdfs[i],c='b',alpha=0.01) for i in range(500)];

plt.plot(binmid[maxind], magel_corrected[maxind],c="r",marker="*",markersize=20)
plt.ylim(bottom=0.5)
plt.yscale('log')
plt.xlabel('Log(L_bol)')
plt.ylabel("number of AGN")
plt.legend(bbox_to_anchor=(1,1));

### malmquist correction
tried volume limited sample but our sample is already very bright, so doesn't work

In [None]:
from astropy.cosmology import WMAP9 as cosmo
mlim = 22.2 # r-band
sdss = pd.read_pickle("/home/insepien/research-data/alpaka/sdss-cat/sdss_rband_171.pkl")
sdss40 = sdss[sdss['DESIG'].isin(mul['desig'])]
sdss40['r-mag'] = 22.5 - 2.5 * np.log10(sdss40['spectroFlux_r'])
sdss40 = pd.merge(sdss40,mul.rename(columns={'desig':'DESIG'})[['Z','DESIG']],on='DESIG')
sdss40['M'] = sdss40['r-mag']-5*np.log10((cosmo.angular_diameter_distance(sdss40['Z'])*u.Mpc/(10*u.pc).to(u.Mpc)))

Mlim = -16.5
mask = sdss40['M'] < Mlim
dmax = (10**((mlim-Mlim)/5)*10*u.pc).to(u.Mpc)
from scipy.interpolate import CubicSpline
spl = CubicSpline(cosmo.angular_diameter_distance(np.linspace(0,1,20)), np.linspace(0,1,20))
zmax = spl(dmax.value)
zmax

make table of dual fraction from literature

In [None]:
dfr = pd.read_csv("/home/insepien/research-data/pop-result/lit_rev_frac/dualfrac_rev2.csv")
dfr_agn = dfr[~ dfr['lower dual frac'].isna()]
errs = []
not_nan_ind = dfr_agn['lower dual frac error'].dropna().index.values
nan_ind = dfr_agn.index[~ np.isin(dfr_agn.index,dfr_agn['lower dual frac error'].dropna().index.values)]
for i in range(len(dfr_agn)):
    if i in not_nan_ind:
        try:
            errs.append(np.abs(dfr_agn['lower dual frac'][i]-sorted(np.array(dfr_agn['lower dual frac error'][i].split(','),dtype=float))))
        except:
                print(i)
    else:
         errs.append(np.array([np.nan,np.nan]))
errs = pd.DataFrame(errs)

fmt = lambda minv, maxv: f"${minv:.0f}-{maxv:.0f}$"
fmt2 = lambda minv, maxv: f"${minv:.2f}-{maxv:.2f}$"
fmt_err = lambda val, low_err, up_err: f"${val:.3f}^{{-{low_err:.4f}}}_{{+{up_err:.4f}}}$"
fmt_lbol = lambda min,max : f"$10^{{{min:.0f}}}-10^{{{max:.0f}}}$"


redshift = [fmt2(minvl,maxvl) for minvl,maxvl in zip(dfr_agn['min z'],dfr_agn['max z'])]
sep = [fmt(minvl,maxvl) for minvl,maxvl in zip(dfr_agn['Min sep'],dfr_agn['Max sep'])]
lbols = [fmt_lbol(minvl,maxvl) for minvl,maxvl in zip(dfr_agn['min Lbol'],dfr_agn['max Lbol'])]
fracs = [fmt_err(val,errl,erru) for val,errl,erru in zip(dfr_agn['lower dual frac'],errs[0],errs[1])]

keys = ['Paper Name', 'Red shift', 'Separation $[kpc]$', "$L_{bol}~[erg~s^{-1}$]", "Selection method", "Dual fraction", "Fraction definition"]
tabb=pd.DataFrame([list(dfr_agn['Paper']),redshift,sep,lbols,list(dfr_agn['selection']),fracs,list(dfr_agn['note'])],index=keys).T.to_latex(column_format='c|c|c|c|c|c',index=False)
print(tabb)


fraction 

In [None]:
def volonteri22(plot=False):
    # load CDF frac at z~0 (tried to get z~0.15 but hard to get accurate values from plot digitizer)
    v = pd.read_csv('/home/insepien/research-data/pop-result/fvL/volonteri22.csv',names=['z','logN'])
    frac_cdf = 10**(np.array([v.loc[i,'logN']-v.loc[i-1,'logN'] for i in [1,3,5]]))
    x = np.array([42,43,44])
    # interpolate the cdf frac for better precision when taking gradient
    cdf_spl = CubicSpline(x,frac_cdf)
    xfine = np.linspace(42,44,100)
    fracfine_cdf = cdf_spl(xfine)
    # take gradient of cdf to get pdf
    fracfine_pdf = np.gradient(fracfine_cdf,xfine)

    # plot
    if plot:
        fig,ax = plt.subplots(1,2,figsize=(8,4))
        ax[0].plot(xfine,fracfine_cdf,label='Volonteri+22')
        ax[0].plot(x,frac_cdf)
        ax[0].set_xlabel("Log(Lbol) [erg/s]")
        ax[0].set_ylabel("dual frac ($>$Lbol)")
        ax[1].plot(xfine,fracfine_pdf)
        ax[1].set_xlabel("Log(Lbol) [erg/s]")
        ax[1].set_ylabel("dual frac")
        fig.suptitle("Convert dual frac CDF to PDF in Volonteri+22")
        fig.tight_layout()

    return xfine,fracfine_pdf


def fu12(plot=False):
    # load and clean df
    with open("/home/insepien/research-data/pop-result/fvL/fu2012.txt","r") as f:
        fu = f.readlines()
    fu = [dd.split("\t")[:-1] for dd in fu]
    dfu = pd.DataFrame(fu[3:],columns=fu[0])
    dfu.drop(index=[0,3,25],inplace=True)
    # get z<0.2 subsample and calculate Lbol= L+oiii total * 3500
    zlim = 0.4
    dfz2 = dfu.copy()[dfu['z'].astype(float)<zlim]
    dfz2.reset_index(inplace=True, drop=True)
    dfz2['Lbol'] = np.log10(3500*(10**dfz2['L_oiiib'].astype(float) + 10**dfz2['L_oiiir'].astype(float)))+33
    # load merger data for frac cal
    with open("/home/insepien/research-data/pop-result/fvL/fu12merger.txt","r") as f:
        merger = f.readlines()
    merger = [dd.split("\t")[:-1] for dd in merger]
    mergerdf = pd.DataFrame(merger[1:],columns=merger[0])
    mergernames = mergerdf['SDSS Name'].values
    mergermask = dfz2['SDSS Name'].isin(mergernames)
    # get counts in Lbol bins and plot fraction
    binz = np.linspace(dfz2['Lbol'].min(),dfz2['Lbol'].max(),int(np.sqrt(len(dfz2['Lbol']))))
    count_merger, _ = np.histogram(dfz2['Lbol'][mergermask],bins=binz)
    sebin_mask  = dfz2['SDSS Name'].isin(['124037.8+353437','115106.7+471158*'])
    dpbin_mask = dfz2['SDSS Name'].isin(['095207.6+255257*', '150243.1+111557*'])
    count_dpbin,_ = np.histogram(dfz2['Lbol'][dpbin_mask].values,bins=binz)
    count_sebin,_ = np.histogram(dfz2['Lbol'][sebin_mask].values,bins=binz)
    binfrac = count_dpbin/count_merger * 29/100 *1/100 + count_sebin/count_merger * 29/100
    midbin = binz[:-1]+np.diff(binz)/2
    if plot:
        plt.scatter(midbin, binfrac)
        plt.xlabel("Lbol")
        plt.ylabel(f"binary/ SDSS AGN ($z<{zlim}$)")
        plt.title("Infer fraction from data table in Fu+2012 (dpeak)")
    return midbin, binfrac

def imanishi():
    """note that this combines samples from Imanishi+13 and +20 but excludes LIRG, which has Lir<12
        also, the number of bins is set to 4, since the Lbol range is small compared to number of data points"""
    # open and clean data
    with open("/home/insepien/research-data/pop-result/fvL/imanishi2013.txt","r") as f:
        iman = f.readlines()
    iman13 = [i.strip().split('\t') for i in iman]
    imandf13 = pd.DataFrame(iman13[3:],columns=iman13[0])
    with open("/home/insepien/research-data/pop-result/fvL/imanishi20.txt","r") as f:
            iman = f.readlines()
    iman20 = [i.strip().split('\t') for i in iman]
    imandf20 = pd.DataFrame(iman20[3:],columns=iman20[0])
    mergeddf = pd.concat([imandf13[['Object','log L_IR']],imandf20[['Object','log L_IR']]])
    ulirg_mask = (mergeddf['log L_IR'].astype(float) >= 12)
    uldf = mergeddf[ulirg_mask]
    # assume Lir/Lbol = 0.12
    imanLbol = np.log10(10**(uldf['log L_IR'].astype(float).values+33)/0.12)
    # separate 4(2013)+2(2020) duals
    duals_iman = ['Mrk 273', 'Arp 220', '16474', 'NGC 6240', "12072", "12112"]
    dual_lbol = []
    for d in duals_iman:
        dual_mask = uldf['Object'].str.contains(d)
        try:
            dual_lbol.append(imanLbol[dual_mask][0])
        except:
            pass
    # make histogram stats
    imanbins = np.linspace(imanLbol.min(),imanLbol.max(), 4)
    imanBinMid  = imanbins[1:]-np.diff(imanbins)/2
    c_all, _ = np.histogram(imanLbol,bins = imanbins)
    c_dual, _ = np.histogram(dual_lbol,bins=imanbins)
    return imanBinMid, c_dual/c_all

def load_gross(fn):
    with open("/home/insepien/research-data/pop-result/fvL/"+fn,"r") as f:
        dd = f.readlines()
    data = [d.strip().split('\t') for d in dd]
    df = pd.DataFrame(data[3:], columns=data[0])
    return df

def gross23():
    # 17 kinematic pairs radio obs, this is technically the final sample, since they use high-res 6Ghz imaging to determin duals
    # paper says there are 6 duals, listed as 12 rows, plus 11 singles, so the final table should have 23 rows. but 1 pair 22522 is not dAGN but listed as 2 rows --> 24 rows
    gradio = load_gross("gross23radio.txt")
    # 21 kinematic radio gal pairs oiii, not observing 4 sources in radio due to low snr
    goiii = load_gross("gross23oiii.txt")
    # 21 kinematic pairs redshift to infer jet lum
    gz = load_gross("gross23z.txt")
    # merge kinematic pairs df (manually changed 1 name in txt file)
    kpairs = gz.merge(goiii, on='Optical ID')
    # make unique name since desig varies among tables....
    kpairs['name'] = [kpairs.loc[i,'Optical ID'][:5] for i in range(len(kpairs))]
    # remove 3 pairs without full OIII meas
    pair_no_oiii = kpairs[(kpairs['L _[O III]'] == 'cdots')]['name']
    kpairs_36_withoiii = kpairs.copy()[~ kpairs['name'].isin(pair_no_oiii)]
    kpairs_36_withoiii.reset_index(inplace=True, drop=True)
    # get total L_oiii and average redshift of 18 kinematic pairs with oiii meas.
    loiii_tot = []
    z = []
    uniname = []
    for i in range(0,len(kpairs_36_withoiii),2):
        loiii_tot.append(np.log10(10**float(kpairs_36_withoiii.loc[i,"L _[O III]"])+ 10**float(kpairs_36_withoiii.loc[i+1,"L _[O III]"])))
        z.append(kpairs_36_withoiii.loc[i:i+1,"z _spec"].astype(float).sum()/2)
        uniname.append(kpairs_36_withoiii.loc[i,'name'])
    kpairs18 = pd.DataFrame([uniname,z,loiii_tot],index=['name','z','loiii']).T
    # group radio data by unique name and get sum of flux
    gradio['name']=[gradio.loc[i,'Radio Designation'][:5] for i in range(len(gradio))]
    gradio17 = gradio.groupby('name',as_index=False).sum()
    # merge 6ghz radio, oiii, and z data. 14 objs have all 3
    g23samp = gradio17.merge(kpairs18,on='name')
    # add 1.4ghz data for Lbol cal
    gross14ghz = load_gross('gross23radio14.txt')
    gross14ghz['flx1.4GHz'] = gross14ghz['flx1.4GHz'].astype(float)
    gross14ghz['name'] = [g[:5] for g in gross14ghz['Radio Designation'].values]
    gross14ghz.drop(columns='z',inplace=True)
    gross14ghz_uni  = gross14ghz.groupby("name",as_index=False).sum()
    fsamp = g23samp.merge(gross14ghz_uni,on='name')
    # calculate lbol
    Lradio = (fsamp['flx1.4GHz'].values*1e-3*u.Jy*cosmo.angular_diameter_distance(fsamp['z'].values)**2*4*np.pi)
    Lmech = 43+ 0.7*np.log10((Lradio/ (1e24*u.watt/u.Hz)).to("").value.astype(float))
    Lrad = 3500*10**fsamp['loiii'].values
    fsamp['lbol'] = np.log10((10**Lmech+Lrad).astype(float))
    # separate duals
    gdualnames = ['00511', '22063', '22322', '22442', '23001', '23195']
    gduals = fsamp[fsamp['name'].isin(gdualnames)]
    # make hist
    gbin = makebin(fsamp['lbol'][fsamp['z']<0.4])
    gcount_all, _ = np.histogram(fsamp['lbol'][fsamp['z']<0.4],bins=gbin)
    gcount_dual, _ = np.histogram(gduals['lbol'][gduals['z']<0.4],bins=gbin)
    print(gcount_all,gcount_dual)
    return gbin[1:]-np.diff(gbin)/2, gcount_dual/gcount_all

def he23():
    # read data, really ugly text file with messy spaces
    with open("/home/insepien/research-data/pop-result/fvL/he23.txt","r") as f:
        d  = f.readlines()
    # extract name and L 2-10 keV
    data = [i.strip("\n") for i in d[1:]]
    names = [d[:19] for d in data]
    uninames = [d[:6] for d in data]
    log_L2_10 = np.array([d[-32:-27] for d in data],dtype=float)
    # put into df, add Lbol = Lxray * 10
    hedf = pd.DataFrame([names,log_L2_10,uninames],index=['name','Lxray','uniname']).T
    hedf['lbol'] = log_L2_10 + 1
    # get only x-ray detected source in an optical pair that has AGN lum Lxray > 41
    hedfagn = hedf[hedf['Lxray']>41]
    heduals = ['J14144','J09071']
    # drop from a gal pair that has 2 detected xray sources, 1 source that only has upper lim of Lxray. this target is thus not categorized as dual
    hedfagn.drop(index=12, inplace=True)
    # get lbol for duals and singles
    lbol_dual = []
    for i in range(len(heduals)):
        lbol_indiv = hedfagn[hedfagn['uniname'] == heduals[0]]['lbol'].values
        lbol_dual.append(np.log10(10**lbol_indiv[0]+10**lbol_indiv[1]))
    lbol_single = hedfagn[~ hedfagn['uniname'].isin(heduals)]['lbol'].values
    lbol_all_agn = np.concatenate([lbol_single,lbol_dual])
    # get hist stats
    hebin = makebin(lbol_all_agn)
    hecount_all, _ = np.histogram(lbol_all_agn,bins=hebin)
    hecount_dual, _ = np.histogram(lbol_dual,bins=hebin)
    return hebin[1:]-np.diff(hebin)/2, hecount_dual/hecount_all


makebin = lambda x: np.linspace(x.min(),x.max(),int(np.sqrt(len(x))))

In [None]:
def plotpaper(ax,fn,markertype,clr,lbl):
    df = pd.read_csv('/home/insepien/research-data/pop-result/fvL/'+fn, names=['lbol','f'])
    # sort by lbol, then frac, to find fraction and up/lower limits
    df.sort_values(by=['lbol','f'],inplace=True)
    df.reset_index(inplace=True,drop=True)
    # data point is the mid value in y at index 1, 3, ...
    x = [df.loc[i]['lbol'] for i in range(1,len(df),3)]
    f = np.array([df.loc[i]['f'] for i in range(1,len(df),3)])
    err = np.array([np.abs(df.loc[i]['f'] - df.loc[i-1]['f']) for i in range(1,len(df),3)])
    ax.errorbar(x,f,yerr=[err,err],linestyle='',fmt=markertype,c=clr,label=lbl)

# literature
all_markers = ["o","s","x","d",'p',"H"]
all_colors = sns.color_palette("colorblind", len(all_markers))
fns = ['ford2015.csv','koss12bat.csv','koss12sdss.csv','barrows2017.csv','barrows2023.csv']
labels = ['Ford\&Greene+15\n(spectrally offset, $z<0.21$)',
          'Koss+12\n(X-ray, $z<0.05$)', 'Koss+12\n(Optical, $z<0.07$)',
            'Barrows+17\n(optical, spatially offset, $0.025<z<0.2$)', 'Barrows+23\n(use photo-z PDF to find WISE duals, $z<0.08$)']

fig,ax = plt.subplots()
# ready-plots
for i in range(len(fns)):
    plotpaper(ax, fns[i], lbl=labels[i],
              clr=all_colors[i], markertype=all_markers[i])
    

# inferred fracs
# volonteri+22
xfine,fracfine_pdf = volonteri22()
ax.plot(xfine,fracfine_pdf, label='Volonteri+22\n(simulation, z=0)',c='green',linestyle='--')
# fu+2012
xfu, fracfu = fu12()
ax.plot(xfu,fracfu, label='Fu+12\n(douple-peak IFS, $z<0.4$)',c='steelblue',linestyle='-.')
# imanishi+20 - ULIRG
xIman, fracIman = imanishi()
ax.plot(xIman,fracIman, label='Imanishi+13,+20\n(ULIRG, $z<0.2$)',c=all_colors[4],linestyle='--')
# gross+23
xgross, fracgross = gross23()
ax.scatter(xgross,fracgross, label='Gross+23\n(radio, dual/pair, very small sample (4/8), $z<0.4$)',c='r',marker='x')
# he+23
xhe, frache = he23()
ax.scatter(xhe,frache, label='He+23\n(optical+xray, dual/gal. pair hosting $\geq 1$ Chandra Xray AGN,\n$z<0.2$)',c=all_colors[5],marker='x')

# Magellan
unresolved_duals = alpaka[alpaka['Desig'].isin(['J0932+1611',"J0918+1207"])]
all_dual_cand = pd.concat([unresolved_duals, duals])
count_all,binz = np.histogram(np.log10(magel['irbol']).dropna())
count_duals,_ = np.histogram(np.log10(all_dual_cand['irbol'].dropna()),bins=binz)
binmid = (binz[1:]+binz[:-1])/2
ax.scatter(binmid,count_duals/count_all,c='k',marker="*",label='Our fraction\n(counting unresolved candidates)',s=50)

# cosmetics
ax.set_yscale('log')
ax.legend(bbox_to_anchor=(1,1));