In [1]:
from scipy import interpolate
import os
import numpy
from scipy.stats import linregress
import pandas as pd
import sys
sys.path.append(f'{os.environ["HOME"]}/Projects/planckClusters/catalogs')
from load_catalogs import load_PSZcatalog
from astropy.io import ascii


In [2]:
def calc_completeness_model(field, filter, data_path=None):
    ''' Calculates the completeness using a histogram. '''

    bins = numpy.arange(15, 30, 0.5)
    centers = (bins[:-1] + bins[1:]) / 2

    if not data_path:
        data_dir = '../data/proc2'

    cat = f'{data_path}/{field}/{field}{filter}_cal.cat'
    try:
        cat = ascii.read(cat)
    except FileNotFoundError:
        return 0
    cat = cat.to_pandas()
    cat = cat.loc[cat.MAG_AUTO < 40]
    cat = cat.loc[cat.CLASS_STAR < 0.8]

    # make a bunch of figures
    n, bins_ = numpy.histogram(cat['MAG_AUTO'], bins=bins)

    # make it a log plot
    logn = numpy.log10(n)

    # find the peak
    peak = numpy.argmax(logn)

    # make a model from mag 18.5 - 21.5
    model = linregress(centers[peak - 5:peak], logn[peak - 5:peak])

    # convert the linear model in lin-log space to log in linear space
    #  and figure out where 80% completeness is
    # see https://en.wikipedia.org/wiki/Semi-log_plot
    y = n / (10**model.intercept * 10**(centers * model.slope))
    x = centers

    # plot(y, x) to see how the ratio curve goes.
    func = interpolate.interp1d(x, y)

    # the interpolate wasn't doing very well...
    # when just asked what is 80%
    mags = numpy.arange(centers[0], centers[-1], 0.1)
    magdiff = 0.8 - func(mags)

    # find the last bin where the difference is negative
    # this is the bin, with the highest magnitude, where we go from having
    # more observed objects to more objects in the model.
    mag_idx = numpy.where(magdiff < 0)[0][-1]

    print(field, f'{mags[mag_idx]:.3f}')

    return mags[mag_idx]

In [None]:
data = load_PSZcatalog()

In [None]:
calc_completeness_model('PSZ2_G305.76+44.79', 'g')
calc_completeness_model('PSZ2_G305.76+44.79', 'r')
calc_completeness_model('PSZ2_G305.76+44.79', 'i')
calc_completeness_model('PSZ2_G305.76+44.79', 'z')
calc_completeness_model('PSZ2_G305.76+44.79', 'K')

In [None]:
datapath = '../data/proc2'

completeness = pd.DataFrame(0.0, index=data.index, columns=['g', 'r', 'i', 'z', 'K'])

for i, row in data.iterrows():
    n = row.NAME.replace(' ', '_') 
    #print(n) 
    if os.path.isdir(f'{datapath}/{n}'): 
        name_us = n
    else:
        try:
            n_psz1 = row.NAME_PSZ1.replace(' ', '_')
        except AttributeError:
            continue
        if os.path.isdir(f'{datapath}/{n_psz1}'): 
            name_us = n_psz1 
        else: 
            continue

    completeness.iloc[i]['g'] = calc_completeness_model(name_us, 'g')
    completeness.iloc[i]['r'] = calc_completeness_model(name_us, 'r')
    completeness.iloc[i]['i'] = calc_completeness_model(name_us, 'i')
    completeness.iloc[i]['z'] = calc_completeness_model(name_us, 'z')
    completeness.iloc[i]['K'] = calc_completeness_model(name_us, 'K')

In [None]:
completeness.describe()

In [None]:
completeness['NAME'] = data['NAME']

In [None]:
completeness.to_csv('completenesses.csv')

In [3]:
data2 = load_PSZcatalog(unconf=True, extras=True, us=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df.iloc[i]['mosaic'] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df.iloc[i]['newfirm'] = True


In [4]:
data2

Unnamed: 0.1,INDEX_PSZ1,NAME_PSZ1,RA_PSZ1,DEC_PSZ1,REDSHIFT_PSZ1,SNR_PSZ1,INDEX_PSZ2,NAME_PSZ2,RA_PSZ2,DEC_PSZ2,...,DEC,SNR,mosaic,newfirm,Unnamed: 0,g,r,i,z,K
0,4.0,PSZ1 G001.00+25.71,244.584113,-13.070074,,6.041658,,,,,...,-13.070074,6.041658,False,False,3.0,0.00,0.00,0.00,0.00,0.00
1,8.0,PSZ1 G003.09-22.51,292.164402,-35.711064,,4.924522,,,,,...,-35.711064,4.924522,False,False,7.0,0.00,0.00,0.00,0.00,0.00
2,9.0,PSZ1 G003.60-24.45,294.612885,-35.839243,,5.761011,,,,,...,-35.839243,5.761011,False,False,8.0,0.00,0.00,0.00,0.00,0.00
3,29.0,PSZ1 G011.35-72.94,354.071937,-32.134864,,5.180040,,,,,...,-32.134864,5.180040,False,False,,,,,,
4,30.0,PSZ1 G011.50-40.11,314.933484,-32.848535,,4.723595,,,,,...,-32.848535,4.723595,False,False,29.0,0.00,0.00,0.00,0.00,0.00
5,32.0,PSZ1 G012.48+27.36,249.372932,-3.818622,,4.540755,,,,,...,-3.818622,4.540755,False,False,31.0,0.00,0.00,0.00,0.00,0.00
6,34.0,PSZ1 G012.66+25.79,250.775084,-4.541822,,4.500723,,,,,...,-4.541822,4.500723,False,False,33.0,0.00,0.00,0.00,0.00,0.00
7,37.0,PSZ1 G014.76-62.53,341.675050,-32.178625,,4.579976,48.0,PSZ2 G014.72-62.49,341.633440,-32.201116,...,-32.201116,5.213110,False,False,36.0,0.00,0.00,0.00,0.00,0.00
8,38.0,PSZ1 G015.42+58.42,223.890110,14.377075,,4.652795,,,,,...,14.377075,4.652795,False,False,37.0,0.00,0.00,0.00,0.00,0.00
9,40.0,PSZ1 G017.05-22.67,297.268922,-23.606344,,4.886622,,,,,...,-23.606344,4.886622,False,False,39.0,0.00,0.00,0.00,0.00,0.00


In [5]:
df = data2[['NAME', 'RA', 'DEC', 'g', 'r', 'K']]

In [6]:
df

Unnamed: 0,NAME,RA,DEC,g,r,K
0,PSZ1 G001.00+25.71,244.584113,-13.070074,0.00,0.00,0.00
1,PSZ1 G003.09-22.51,292.164402,-35.711064,0.00,0.00,0.00
2,PSZ1 G003.60-24.45,294.612885,-35.839243,0.00,0.00,0.00
3,PSZ1 G011.35-72.94,354.071937,-32.134864,,,
4,PSZ1 G011.50-40.11,314.933484,-32.848535,0.00,0.00,0.00
5,PSZ1 G012.48+27.36,249.372932,-3.818622,0.00,0.00,0.00
6,PSZ1 G012.66+25.79,250.775084,-4.541822,0.00,0.00,0.00
7,PSZ2 G014.72-62.49,341.633440,-32.201116,0.00,0.00,0.00
8,PSZ1 G015.42+58.42,223.890110,14.377075,0.00,0.00,0.00
9,PSZ1 G017.05-22.67,297.268922,-23.606344,0.00,0.00,0.00
