In [1]:
#from astropy.io import ascii
#from astropy.table import Table
import os
from astropy.io import ascii
from astropy.table import Table
import pandas as pd
import numpy as np

In [2]:
# Declerations
failed_cdfs_dir = os.path.join('..', 'ZFOURGE', 'data', 'cdfs_cigale_failed_ids') # List of failed galaxies by ZFOURGE ID, seperated by \n
cdfs_dir = os.path.join('..', 'ZFOURGE', 'data', 'cdfs', 'cdfs.v1.6.11.cat') # ZFOURGE CDFS field photometry data
cdfs_z_dir = os.path.join('..', 'ZFOURGE', 'data', 'cdfs', 'cdfs.v1.6.9.zout') # Output of Eazy, contains redshift data
filter_match = os.path.join('..', 'ZFOURGE', 'data', 'filter_match_table') # An ASCII table with the ZFOURGE filter name in 1st collumn, and filter transmission file name in 2nd col.
output_dir = os.path.join('..', 'ZFOURGE', 'data', 'working_sample') # Directory for output ascii table

output_failed_galaxies = False # True: will output all galaxies with ids found in list of failed ids.
only_zfourge_filters = True # True: will only use j1, j2, j3, hs, hl, and ks bands. False: uses most avalbile bands
add_random_successful_galaxy = True # True: will add a randomly selected successful galaxy to output
use_z_spec = True # True: will replace redshift in output with z_spec if value != -99.0
manual_galaxy_ids = [44] # Optional list of manually selected galaxy ids to add to output. Expected to be non-failed galxies.

In [3]:
def Read_Ascii(dir):
    # Returns dataframe
    
    with open(dir, 'r') as table:
        header = table.readline()[1:].split() # slice to skip over # character
        #header = [''.join([ch if ch.isalpha() else '' for ch in elem]) for elem in header] # removes non-alphabt characters such as _
    return pd.read_csv(dir, names=header, encoding='ascii', comment='#', delim_whitespace=True)

In [4]:
with open(failed_cdfs_dir, 'r') as failed_ids:
    failed_cdfs = [int(id) for id in failed_ids.readlines()[1:]] # skip header row

cdfs_table = Read_Ascii(cdfs_dir)
print(cdfs_table)

          id         x         y         ra        dec  SEflags  iso_area  \
0          1  4078.527   203.090  53.100105 -27.966728       19    5484.0   
1          2  6292.975    55.927  52.995632 -27.972803        2     315.0   
2          3  1791.772    51.199  53.207989 -27.973034        2     375.0   
3          4  4181.315   176.410  53.095257 -27.967840        3    1060.0   
4          5  5997.950    48.860  53.009548 -27.973110        0     219.0   
...      ...       ...       ...        ...        ...      ...       ...   
30906  30907  4150.499  7668.344  53.096783 -27.655676        0     124.0   
30907  30908  3050.447  7621.582  53.148533 -27.657625        0     195.0   
30908  30909  4385.049  7547.789  53.085751 -27.660698        0      18.0   
30909  30910  3221.845  7527.777  53.140469 -27.661535        0      36.0   
30910  30911  3362.124  7855.663  53.133869 -27.647873        0      68.0   

       fap_Ksall  eap_Ksall    apcorr  ...  wmin_jhk  wmin_hst  wmin_irac  

In [5]:
# Adds failed galxies when true
if output_failed_galaxies == True:
    failed_cdfs_table = cdfs_table[cdfs_table['id'].isin(failed_cdfs)]
        
# Adds manually selected galaxies when provided
failed_cdfs_table = pd.concat([failed_cdfs_table, 
                               cdfs_table[cdfs_table['id'].isin(manual_galaxy_ids)]], 
                               ignore_index=True)
if any(item in failed_cdfs for item in manual_galaxy_ids):
    print("Manually selected galaxy is in list of galaxies. This galaxy may appear twice in output.")

# Adds random galaxy not already included when true
if add_random_successful_galaxy == True:
    i = int(np.random.rand()*len(cdfs_table)) # i <- [0, 30911)
    while i+1 in failed_cdfs: # +1 b/c row number (i) is 1 less than id number.
        i = int(np.random.rand()*len(cdfs_table))
    failed_cdfs_table = pd.concat([failed_cdfs_table, cdfs_table.loc[i:i]], ignore_index=True)

failed_cdfs_table.sort_values('id') # **why doesn't this work**
print(failed_cdfs_table)

        id         x         y         ra        dec  SEflags  iso_area  \
0        4  4181.315   176.410  53.095257 -27.967840        3    1060.0   
1       64  4111.995   178.501  53.098526 -27.967752        3      93.0   
2       85  6439.011   226.945  52.988751 -27.965671        3     158.0   
3      283  5402.015   365.498  53.037674 -27.959938        0      16.0   
4      322  2583.467   476.946  53.170631 -27.955313       19     352.0   
..     ...       ...       ...        ...        ...      ...       ...   
552  30767  5007.865  7492.322  53.056450 -27.662998        3      35.0   
553  30845  2812.941  7915.292  53.159702 -27.645386        0      88.0   
554  30886  4189.733  7594.125  53.094936 -27.658770        3     387.0   
555     44  5235.337   240.340  53.045532 -27.965157        0     569.0   
556  19610  5753.652  4896.251  53.021263 -27.771145        0       8.0   

       fap_Ksall  eap_Ksall    apcorr  ...  wmin_jhk  wmin_hst  wmin_irac  \
0      76.361450   0.2

In [6]:
# Access and parse redshift data
cdfs_z_table = Read_Ascii(cdfs_z_dir)
failed_cdfs_z_table = cdfs_z_table[cdfs_z_table['id'].isin(failed_cdfs_table['id'])]
print(failed_cdfs_z_table)

          id  z_spec    z_a   z_m1       chi_a    z_p         chi_p   z_m2  \
3          4   -99.0  3.812  3.815   662.38530  3.812  6.623853e+02  3.561   
43        44   -99.0  0.062  0.060    14.13757  0.062  1.413757e+01  0.060   
63        64   -99.0  7.370  8.492  1340.59100  7.370  1.340591e+03  7.327   
84        85   -99.0  0.356  0.338    92.12006  0.362  9.214599e+01  0.343   
282      283   -99.0  1.000  0.948    13.45585  1.000  1.345585e+01  0.957   
...      ...     ...    ...    ...         ...    ...           ...    ...   
30713  30714   -99.0  6.207  6.207  1070.11400  0.005  1.000000e+08  0.005   
30765  30766   -99.0  3.041  3.049   440.28820  3.041  4.402882e+02  3.041   
30766  30767   -99.0  2.883  2.882   236.87310  2.883  2.368731e+02  2.882   
30844  30845   -99.0  6.923  6.923   121.09890  6.923  1.210989e+02  6.923   
30885  30886   -99.0  5.206  5.213   926.91580  0.005  1.000000e+08  0.005   

        odds    l68    u68    l95    u95    l99    u99  nfilt  

In [19]:
# Defines output table and adds id and redshift columns
output = pd.merge(failed_cdfs_table, failed_cdfs_z_table, how='left', on='id')
output = output[['id', 'z_peak']] # not very efficient to delete most of the dataframe just to add everything back later
print(output)

        id  z_peak
0        4  3.8120
1       64  7.3259
2       85  0.3429
3      283  0.9579
4      322  1.6893
..     ...     ...
552  30767  2.8805
553  30845  6.9235
554  30886  0.0050
555     44  0.0604
556  19610  0.4491

[557 rows x 2 columns]


In [20]:
# Reads matching table
filter_table = Read_Ascii(filter_match)
print(filter_table)

      ZFOURGE                                  CIGALE
0      z_peak                                redshift
1         f_B      hst-ACS_update_sep07-wfc_f435w_t81
2         e_B  hst-ACS_update_sep07-wfc_f435w_t81_err
3         f_I      hst-ACS_update_sep07-wfc_f775w_t81
4         e_I  hst-ACS_update_sep07-wfc_f775w_t81_err
..        ...                                     ...
76  e_IRAC_45            IRAC-irac_tr2_2004-08-09_err
77  f_IRAC_58                IRAC-irac_tr3_2004-08-09
78  e_IRAC_58            IRAC-irac_tr3_2004-08-09_err
79  f_IRAC_80                IRAC-irac_tr4_2004-08-09
80  e_IRAC_80            IRAC-irac_tr4_2004-08-09_err

[81 rows x 2 columns]


In [21]:
# Cell adds FourStar filters only
if only_zfourge_filters == True:
    zf_with_ids = pd.concat([failed_cdfs_table['id'], 
                            failed_cdfs_table.loc[:, filter_table.loc[13:24, 'ZFOURGE']]],
                            axis=1)
    # this is done so that the merge can be done by the id column
    output = pd.merge(output, zf_with_ids, how='left', on='id') # do not run this more than once!!!
    print(output)

        id  z_peak       f_Hs       e_Hs       f_Hl       e_Hl       f_J1  \
0        4  3.8120 -99.000000 -99.000000 -99.000000 -99.000000 -99.000000   
1       64  7.3259 -99.000000 -99.000000 -99.000000 -99.000000 -99.000000   
2       85  0.3429 -99.000000 -99.000000 -99.000000 -99.000000 -99.000000   
3      283  0.9579 -99.000000 -99.000000 -99.000000 -99.000000 -99.000000   
4      322  1.6893 -99.000000 -99.000000 -99.000000 -99.000000 -99.000000   
..     ...     ...        ...        ...        ...        ...        ...   
552  30767  2.8805 -99.000000 -99.000000 -99.000000 -99.000000 -99.000000   
553  30845  6.9235   9.841538   1.188658   8.892500   1.012411   6.490682   
554  30886  0.0050 -99.000000 -99.000000 -99.000000 -99.000000 -99.000000   
555     44  0.0604 -99.000000 -99.000000 -99.000000 -99.000000 -99.000000   
556  19610  0.4491   0.271997   0.226003   0.152502   0.239155   0.266472   

          e_J1       f_J2       e_J2       f_J3       e_J3       f_Ks  \
0 

In [None]:
# Cell adds (mostly) all filters
#if only_zfourge_filters == False:
#    for i in range(2, len(filter_table)): # goes through every filter in the match table
#        output.add_column(failed_cdfs_table[filter_table['ZFOURGE'][i]], name = filter_table['CIGALE'][i])

if only_zfourge_filters == False:
    all_with_ids = pd.concat([failed_cdfs_table['id'], 
                            failed_cdfs_table.loc[:, filter_table['ZFOURGE']]],
                            axis=1)
    # this is done so that the merge can be done by the id column
    output = pd.merge(output, zf_with_ids, how='left', on='id') # do not run this more than once!!!
    print(output)

In [26]:
# Renames output to CIGALE format using matching table

#output.rename(columns={'id':'id', 'z_peak':'redshift', 'foo':'bar'}, inplace=True)
# TODO ^^
print(failed_cdfs_table.loc[:, filter_table['ZFOURGE']])

KeyError: "['z_peak'] not in index"

In [None]:
# replaces photo z with z_spec if avalible
for i in range(len(output)):
    if cdfs_z_table['z_spec'][output['id'][i]-1] > -99.0:
        output['redshift'][i] = cdfs_z_table['z_spec'][output['id'][i]-1]

In [None]:
# Convert units of flux to mJy, and sets -99 to NaN
for i in output.columns[2:]:
    for j in range(len(i)):
        if output[i][j] == -99:
            output[i][j] = np.nan
    output[i] = output[i] * 0.3631 / 1000

In [None]:
output.write(output_dir, format='ascii', overwrite=True)

In [None]:
output