In [1]:
import splat
import wisps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os.path import expanduser
homedir = expanduser("~")
from astropy.io import ascii
%matplotlib inline
import ast

In [2]:
#get photometry catalogs
w_phot=pd.read_csv(wisps.OUTPUT_FILES+'/wisp_photometry.csv')
hst_phot=pd.read_csv(wisps.OUTPUT_FILES+'/hst3d_photometry_all.csv')

In [3]:
#

In [4]:
#rename wisp grism ids
w_phot['grism_id']=w_phot['grism_id'].apply(lambda x: x.lower())
#w_phot

In [5]:
#read in the indices
cols=['name', 'snr', 'spex_chi', 'line_chi', 'spt', 'indices']
ids=pd.read_csv(homedir+'/wisp_indices.txt', names =cols, delimiter ='\t')

In [6]:
def strip_diction(s):
        #from string to dictionary
        return pd.Series(ast.literal_eval(((s.strip().replace('nan', "'nan'").replace('-inf', "'nan'")).replace('inf', "'nan'"))))

In [7]:
def reformat_index_table(df):
    #assign dictionary keys
    ids_list=[]
    snr_list=[]
    for k in df.columns:
        first=df[k].iloc[0]
        print (k)
        if isinstance(first, str):
            if first.strip().startswith("{"):
                if not k=='indices':
                    snr_list.append(df[k].apply(lambda x: strip_diction(x)))
                else:
                    ids_list.append(df[k].apply(lambda x: strip_diction(x)).applymap(tuple))
      
    
    return snr_list, ids_list

In [8]:
def replace(x):
    #print (x)
    if (isinstance(x, float)):
        if (np.isnan(x)):
            return tuple([np.nan, np.nan])
    else:
        return x

In [9]:
cols=list(wisps.INDEX_NAMES)

In [10]:
snr_list, ids_list=reformat_index_table(ids)
fmtids=(pd.concat(snr_list).join(pd.concat((ids_list )))).replace('nan', np.nan)
fmtids[cols]=fmtids[cols].applymap(lambda x: replace(x))
len(fmtids), len(ids)

name
snr
spex_chi
line_chi
spt
indices


(430413, 430413)

In [11]:
indices=fmtids
indices[['name', 'spex_chi', 'line_chi', 'spt']]=ids[['name',  'spex_chi', 'line_chi', 'spt']]

In [12]:
#hst_phot.grism_id

In [13]:
from scipy import stats
def f_test(x):
    """
    f-test statistic with defualt degrees of freedom
    """
    return stats.f.pdf(x, 2, 1, 0, scale=1)

In [14]:
def combined_wisp_hst_catalogs(hst3d_phot,wisp_phot, indices):
    """
    combine both hst-3d and wisps into one big file with all the information
    """
    #hst_3d does not have 110 photometry
    hst3d_phot['F110_mag']=np.nan
    hst3d_phot['F110_mag_er']=np.nan

    
    #combine flags into one flag
    flgs=hst3d_phot[['use_phot_x', 'f_cover', 'f_flagged', 'f_negative']].values
    hst3d_phot['flags']= pd.Series([i for i in flgs])
    
    hst3d_phot['survey']='HST3D'
    wisp_phot['survey']='WISP'
    wisp_phot=wisp_phot.rename(columns={'EXTRACTION_FLAG':'flags'})
   
    #rename some columns
    indices=indices.rename(columns={'name':'grism_id'})
    
    ##drop .ascii from hst_phot
    indices['grism_id']=indices['grism_id'].apply(lambda x : x.split('.1D.ascii')[0])
    
    #combined_photometry (the order matters: HST3D+WISPP
    comb_phot=pd.DataFrame()
    grism_ids=hst3d_phot['grism_id'].append(wisp_phot['grism_id'])
    comb_phot['grism_id']=grism_ids
    
    print(comb_phot.columns)
    for flt in ['110', '140', '160']:
        mag_tuple1=hst3d_phot[['F'+flt+'_mag', 'F'+flt+'_mag_er']].apply(tuple, axis=1)
        mag_tuple2=wisp_phot[['NIMCOS_'+flt+'W', 'NIMCOS_'+flt+'W_ER']].apply(tuple, axis=1)
        mags=mag_tuple1.append(mag_tuple2)
        comb_phot['F'+flt]=mags
        
    ras=hst3d_phot['ra_x'].append(wisp_phot['RA'])
    decs=hst3d_phot['dec_x'].append(wisp_phot['DEC'])
        
    comb_phot['RA']=ras
    comb_phot['DEC']=decs
    comb_phot['survey']=hst3d_phot['survey'].append(wisp_phot['survey'])
    comb_phot['flags']=hst3d_phot['flags'].append(wisp_phot['flags'])
    
    #strip white spaces from grism_ids #the combination might pose problems
    comb_phot['grism_id']=comb_phot['grism_id'].apply(lambda x: x.strip())
    indices['grism_id']=indices['grism_id'].apply(lambda x: x.strip())
    
    indices=indices.drop_duplicates(subset='grism_id')
    comb_phot=comb_phot.drop_duplicates(subset='grism_id')
    master_table=pd.merge(indices, comb_phot, on='grism_id', validate='one_to_one')
    
    # I probably lost tons of objects with grism id ='0000'
    print (master_table.shape, comb_phot.shape, indices.shape)
    #print (comb_phot.grism_id, indices.grism_id)
    #measure line and std chi-square
    #df=master_table.grism_id.apply(compare_to_both)
    #replace zeros with something close to zero to avoid dividing by zero
    master_table.line_chi[(master_table.line_chi==0.0)]=np.float(10**-40)
    ###
    master_table[['spex_chi', 'line_chi']]=master_table[['spex_chi', 'line_chi']].applymap(np.float)
    master_table['x']=master_table.spex_chi/master_table.line_chi
    master_table['f_test']=master_table.x.apply(f_test)

    #save the result
    #master_table=master_table.join(df)

    #drop the spectrum column because it makes the file heavier
    #master_table=master_table.drop(columns='spectra')

    #make the cut 

    return master_table

In [15]:
mdf=combined_wisp_hst_catalogs(hst_phot, w_phot, indices)

Index(['grism_id'], dtype='object')
(270436, 26) (279595, 8) (430413, 19)


In [16]:
#get all the observation info for each field
obs=pd.read_csv(wisps.OUTPUT_FILES+'/observation_log.csv')
obs=obs.drop(columns=['Unnamed: 0']).drop_duplicates(subset='POINTING').reindex()

In [17]:
obs.columns=[x.lower() for x in obs.columns]

In [18]:
def get_pointing_name(grism_id):
    if grism_id.lower().startswith('par'):
        return grism_id.split('-')[0]
    else:
        return grism_id.split('-G')[0]

def add_pointing_information(row):
    #print (type(row))
    pntg=get_pointing_name(row.grism_id)
    #print (pntg)
    s3 = pd.Series({'pointing':pntg, 'exposure':obs['exposure (s)'][obs.pointing.isin([pntg])].iloc[0] })
    row=row.append(s3)
    return row

In [19]:
#

In [20]:
mt=mdf.reset_index(drop=True).apply(add_pointing_information, axis=1)

In [21]:
mt.to_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='all_phot_spec_data')

In [22]:
len(mt)

270436

In [23]:
len(ids)

430413

In [24]:
idsonly=indices[indices.snr2>5.0]
idsonly['f']=f_test(idsonly.spex_chi.apply(float)/idsonly.line_chi.apply(float))

In [25]:
#wisps.Spectrum(filepath=wisps.REMOTE_FOLDER+'/aegis/aegis-01/1D/ASCII/aegis-01-G141_14735.1D.ascii')

In [26]:
idsonly[idsonly.f>0.6].rename(columns={'name':'Names'}).to_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='indices_only')

In [27]:
ids.name[ids.name.isin(['par'])]

Series([], Name: name, dtype: object)

In [28]:
ids.columns

Index(['name', 'snr', 'spex_chi', 'line_chi', 'spt', 'indices'], dtype='object')

In [33]:
#mt[mt.grism_id.str.contains('good')]

Unnamed: 0,snr1,snr2,cdf_snr,snr3,snr4,H_2O-1/J-Cont,H_2O-2/H_2O-1,H-cont/H_2O-1,CH_4/H_2O-1,H_2O-2/J-Cont,...,F140,F160,RA,DEC,survey,flags,x,f_test,pointing,exposure
239404,4.186224,4.423259e+00,118.220757,,,"(1.3487317710126276, 0.6155685411502528)","(0.8027776331567207, 0.27513652636355956)","(0.8639420948735189, 0.30705528098752666)","(0.9040639215787958, 0.29832379211365045)","(1.0827316988967894, 2.898734317321559)",...,"(23.613928538194862, 0.026903580694657557)","(23.564342925732433, 0.02942108208955224)",189.112228,62.221630,HST3D,"[1.0, 1.0, 0.02, 0.18]",1.181875,0.162093,goodsn-23,812.0
239405,4.163887,3.655791e+00,396.820575,,,"(1.129106526384418, 0.6040080990604727)","(0.8036680425798302, 0.3775235418167866)","(0.7337192844457326, 2.535059637810781)","(0.7291438396279665, 0.8617649112305648)","(0.9074268319234765, 0.47550006471176776)",...,"(24.135634741534577, 0.03687796021471424)","(23.967799103837567, 0.0376540077294686)",189.138046,62.198376,HST3D,"[1.0, 1.0, 0.03, 0.14]",0.935690,0.205525,goodsn-23,812.0
239406,2.885767,3.010567e+00,90.712954,,,"(0.9519395442561297, 1.8541261467036294)","(1.1122523747300601, 15.099899468485553)","(1.2061194078565436, 11.131061217404408)","(1.097202831623782, 7.579902707405721)","(1.0587970186983315, 1.7389004818891372)",...,"(25.57572608203696, 0.12971507349817316)","(25.684139803209163, 0.1496359923384598)",189.131027,62.213993,HST3D,"[1.0, 1.0, 0.03, 0.5]",1.096396,0.175285,goodsn-23,812.0
239407,10.737043,9.108163e+00,544.043756,,,"(1.0395782123904909, 0.1393280081557806)","(0.9282124166505249, 0.1282112772462605)","(0.694140476983833, 0.10601956055507807)","(0.6117754114637939, 0.11160039458336045)","(0.9649494048202102, 0.13067616034372875)",...,"(21.469016650027218, 0.008939096958910469)","(21.34974836365289, 0.0104294952681388)",189.140594,62.199032,HST3D,"[1.0, 1.0, 0.02, 0.0]",1.053088,0.182667,goodsn-23,812.0
239408,2.430992,2.493035e+00,257.722755,,,"(1.036207712900216, 1.8597749519732436)","(1.017050838968096, 247.19691461417614)","(1.0431807358820597, 6.46432876750622)","(0.9589366280698004, 44.3987028036647)","(1.0538759237503765, 4.83427584335971)",...,"(25.82838303050336, 0.16797395341854682)","(25.677169053881453, 0.13166323979401448)",189.118286,62.225842,HST3D,"[1.0, 1.0, 0.02, 0.48]",0.914744,0.210106,goodsn-23,812.0
239409,2.882270,2.831770e+00,102.802798,,,"(1.5743952796589344, 910.2208400044967)","(0.7670745708522739, 0.5408723977178971)","(0.6574798169239479, 0.6278928127230761)","(0.5878402635251129, 0.6692580638879699)","(1.207678583496223, 28.415934916063406)",...,"(24.94114869611286, 0.07690786681822487)","(24.882787022905728, 0.07776255385996407)",189.119797,62.212620,HST3D,"[1.0, 1.0, 0.03, 0.42]",0.854497,0.224279,goodsn-23,812.0
239410,0.000014,4.095590e+00,104.690976,,,"(0.05382177733791264, 27.96577090353304)","(7.55324231592541, 19.712723118779397)","(80.78155036653813, 0.0032080452778829685)","(77.73464475596963, 0.0026028287166045146)","(0.406528926107037, 39.8047335802974)",...,"(nan, 1.085)","(24.82407402408249, 0.10682069053490942)",189.181580,62.213627,HST3D,"[1.0, 0.17, 0.1, 0.44]",0.385530,0.424278,goodsn-23,812.0
239411,2.747861,3.166364e+00,206.241923,,,"(0.9522840363281755, 1.1988494732883426)","(1.2315702442291012, 8.205374701300093)","(1.4381461589132905, 23.999776269431173)","(1.3119369446070945, 17.872899015800627)","(1.1728046831961656, 4.441751497184532)",...,"(25.324312200930265, 0.1041718771064197)","(25.536853416605748, 0.13721451877356947)",189.138809,62.200848,HST3D,"[1.0, 1.0, 0.03, 0.52]",1.032962,0.186276,goodsn-23,812.0
239412,0.000007,7.578991e-06,117.706290,,,"(0.6040947964179623, 13.727450122240766)","(6.270427300962994, 0.0010600911233042615)","(2.642999630280224, 0.0011067015710005087)","(2.901622291546765, 0.00018847910828660692)","(3.7879325038288725, 0.0007459247328103165)",...,"(25.376313261774527, 0.11067739608819244)","(25.558156263204808, 0.0859255413426971)",189.185242,62.220299,HST3D,"[1.0, 0.49, 0.06, 0.4]",0.655493,0.284645,goodsn-23,812.0
239413,1.912036,1.704120e+00,198.076046,,,"(1.6433964283079423, 12.970144594902688)","(0.7030325061625831, 9.675304468590559)","(0.607599676130746, 51.063359582127916)","(0.7161918817194769, 4.4775272900727705)","(1.1553611096119705, 13.578485951274237)",...,"(24.245418254539015, 0.05940777062434496)","(24.243685667598147, 0.07165033135681897)",189.108063,62.211742,HST3D,"[1.0, 1.0, 0.02, 0.33]",0.675734,0.277327,goodsn-23,812.0


In [30]:
#hst_phot[hst_phot.grism_id.str.contains('aegis')]

In [31]:
plt.plot(d.col1, d.col2)
plt.plot(d.col1, d.col3)

NameError: name 'd' is not defined