In [1]:
import splat
import wisps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os.path import expanduser
homedir = expanduser("~")
from astropy.io import ascii
%matplotlib inline
import ast

In [2]:
#get photometry catalogs
w_phot=pd.read_csv(wisps.OUTPUT_FILES+'/wisp_photometry.csv')
hst_phot=pd.read_csv(wisps.OUTPUT_FILES+'/hst3d_photometry_all.csv')

In [3]:
#

In [4]:
#rename wisp grism ids
w_phot['grism_id']=w_phot['grism_id'].apply(lambda x: x.lower())
#w_phot

In [5]:
#read in the indices
cols=['name', 'snr', 'spex_chi', 'line_chi', 'spt', 'indices']
ids=pd.read_csv(homedir+'/wisp_indices.txt', names =cols, delimiter ='\t')

In [6]:
def strip_diction(s):
        #from string to dictionary
        return pd.Series(ast.literal_eval(((s.strip().replace('nan', "'nan'").replace('-inf', "'nan'")).replace('inf', "'nan'"))))

In [7]:
def reformat_index_table(df):
    #assign dictionary keys
    ids_list=[]
    snr_list=[]
    for k in df.columns:
        first=df[k].iloc[0]
        print (k)
        if isinstance(first, str):
            if first.strip().startswith("{"):
                if not k=='indices':
                    snr_list.append(df[k].apply(lambda x: strip_diction(x)))
                else:
                    ids_list.append(df[k].apply(lambda x: strip_diction(x)).applymap(tuple))
      
    
    return snr_list, ids_list

In [8]:
def replace(x):
    #print (x)
    if (isinstance(x, float)):
        if (np.isnan(x)):
            return tuple([np.nan, np.nan])
    else:
        return x

In [9]:
cols=list(wisps.INDEX_NAMES)

In [10]:
snr_list, ids_list=reformat_index_table(ids)
fmtids=(pd.concat(snr_list).join(pd.concat((ids_list )))).replace('nan', np.nan)
fmtids[cols]=fmtids[cols].applymap(lambda x: replace(x))
len(fmtids), len(ids)

name
snr
spex_chi
line_chi
spt
indices


(430413, 430413)

In [11]:
indices=fmtids
indices[['name', 'spex_chi', 'line_chi', 'spt']]=ids[['name',  'spex_chi', 'line_chi', 'spt']]

In [12]:
#hst_phot.grism_id

In [13]:
from scipy import stats
def f_test(x):
    """
    f-test statistic with defualt degrees of freedom
    """
    return stats.f.pdf(x, 2, 1, 0, scale=1)

In [14]:
def combined_wisp_hst_catalogs(hst3d_phot,wisp_phot, indices):
    """
    combine both hst-3d and wisps into one big file with all the information
    """
    #hst_3d does not have 110 photometry
    hst3d_phot['F110_mag']=np.nan
    hst3d_phot['F110_mag_er']=np.nan

    
    #combine flags into one flag
    flgs=hst3d_phot[['use_phot_x', 'f_cover', 'f_flagged', 'f_negative', 'star_flag']].values
    hst3d_phot['flags']= pd.Series([i for i in flgs])
    
    hst3d_phot['survey']='HST3D'
    wisp_phot['survey']='WISP'
    wisp_phot=wisp_phot.rename(columns={'EXTRACTION_FLAG':'flags'})
   
    #rename some columns
    indices=indices.rename(columns={'name':'grism_id'})
    
    ##drop .ascii from hst_phot
    indices['grism_id']=indices['grism_id'].apply(lambda x : x.split('.1D.ascii')[0])
    
    #combined_photometry (the order matters: HST3D+WISPP
    comb_phot=pd.DataFrame()
    grism_ids=hst3d_phot['grism_id'].append(wisp_phot['grism_id'])
    comb_phot['grism_id']=grism_ids
    
    print(comb_phot.columns)
    for flt in ['110', '140', '160']:
        mag_tuple1=hst3d_phot[['F'+flt+'_mag', 'F'+flt+'_mag_er']].apply(tuple, axis=1)
        mag_tuple2=wisp_phot[['NIMCOS_'+flt+'W', 'NIMCOS_'+flt+'W_ER']].apply(tuple, axis=1)
        mags=mag_tuple1.append(mag_tuple2)
        comb_phot['F'+flt]=mags
        
    ras=hst3d_phot['ra_x'].append(wisp_phot['RA'])
    decs=hst3d_phot['dec_x'].append(wisp_phot['DEC'])
        
    comb_phot['RA']=ras
    comb_phot['DEC']=decs
    comb_phot['survey']=hst3d_phot['survey'].append(wisp_phot['survey'])
    comb_phot['flags']=hst3d_phot['flags'].append(wisp_phot['flags'])
    
    #strip white spaces from grism_ids #the combination might pose problems
    comb_phot['grism_id']=comb_phot['grism_id'].apply(lambda x: x.strip())
    indices['grism_id']=indices['grism_id'].apply(lambda x: x.strip())
    
    indices=indices.drop_duplicates(subset='grism_id')
    comb_phot=comb_phot.drop_duplicates(subset='grism_id')
    master_table=pd.merge(indices, comb_phot, on='grism_id', validate='one_to_one')
    
    # I probably lost tons of objects with grism id ='0000'
    print (master_table.shape, comb_phot.shape, indices.shape)
    #print (comb_phot.grism_id, indices.grism_id)
    #measure line and std chi-square
    #df=master_table.grism_id.apply(compare_to_both)
    #replace zeros with something close to zero to avoid dividing by zero
    master_table.line_chi[(master_table.line_chi==0.0)]=np.float(10**-40)
    ###
    master_table[['spex_chi', 'line_chi']]=master_table[['spex_chi', 'line_chi']].applymap(np.float)
    master_table['x']=master_table.spex_chi/master_table.line_chi
    master_table['f_test']=master_table.x.apply(f_test)

    #save the result
    #master_table=master_table.join(df)

    #drop the spectrum column because it makes the file heavier
    #master_table=master_table.drop(columns='spectra')

    #make the cut 

    return master_table

In [15]:
mdf=combined_wisp_hst_catalogs(hst_phot, w_phot, indices)

Index(['grism_id'], dtype='object')
(270436, 26) (279595, 8) (430413, 19)


In [16]:
#get all the observation info for each field
obs=pd.read_csv(wisps.OUTPUT_FILES+'/observation_log.csv')
obs=obs.drop(columns=['Unnamed: 0']).drop_duplicates(subset='POINTING').reindex()

In [17]:
obs.columns=[x.lower() for x in obs.columns]

In [18]:
def get_pointing_name(grism_id):
    if grism_id.lower().startswith('par'):
        return grism_id.split('-')[0]
    else:
        return grism_id.split('-G')[0]

def add_pointing_information(row):
    #print (type(row))
    pntg=get_pointing_name(row.grism_id)
    #print (pntg)
    s3 = pd.Series({'pointing':pntg, 'exposure':obs['exposure (s)'][obs.pointing.isin([pntg])].iloc[0] })
    row=row.append(s3)
    return row

In [19]:
#

In [20]:
mt=mdf.reset_index(drop=True).apply(add_pointing_information, axis=1)

In [21]:
mt.to_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='all_phot_spec_data')

In [22]:
len(mt)

270436

In [23]:
len(ids)

430413

In [24]:
idsonly=indices[indices.snr2>5.0]
idsonly['f']=f_test(idsonly.spex_chi.apply(float)/idsonly.line_chi.apply(float))

In [25]:
#wisps.Spectrum(filepath=wisps.REMOTE_FOLDER+'/aegis/aegis-01/1D/ASCII/aegis-01-G141_14735.1D.ascii')

In [26]:
idsonly[idsonly.f>0.6].rename(columns={'name':'Names'}).to_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='indices_only')

In [27]:
ids.name[ids.name.isin(['par'])]

Series([], Name: name, dtype: object)

In [28]:
ids.columns

Index(['name', 'snr', 'spex_chi', 'line_chi', 'spt', 'indices'], dtype='object')

In [30]:
#hst_phot[hst_phot.grism_id.str.contains('aegis')]

In [2]:
import wisps
import pandas as pd 
import numpy as np
from astropy.io import ascii, fits

In [5]:
phot=ascii.read(wisps.REMOTE_PHOT_CATALOGS+'/3dhst_master.phot.v4.1/3dhst_master.phot.v4.1.cat').to_pandas()

In [6]:
phot.columns

Index(['id', 'field', 'ra', 'dec', 'x', 'y', 'z_spec', 'z_peak', 'faper_F140W',
       'eaper_F140W', 'faper_F160W', 'eaper_F160W', 'f_F606W', 'e_F606W',
       'f_F814W', 'e_F814W', 'f_F125W', 'e_F125W', 'f_F140W', 'e_F140W',
       'f_F160W', 'e_F160W', 'tot_cor', 'kron_radius', 'a_image', 'b_image',
       'flux_radius', 'fwhm_image', 'flags', 'f140w_flag', 'star_flag',
       'use_phot', 'near_star', 'nexp_f125w', 'nexp_f140w', 'nexp_f160w',
       'lmass', 'Av'],
      dtype='object')

In [7]:
wisps.REMOTE_PHOT_CATALOGS.

'/Volumes/caganze//catalogs//'