In [2]:
import numpy as np
from scipy.stats import skewnorm
import matplotlib.pyplot as plt
from astropy.io import fits
import os
from tqdm.notebook import tqdm

# MC Functions for Photometry

In [22]:
def MCz(niter, zs, weights, z_range, MC_fn, plot_field="", plot_zrange="", verbose=False, **kwargs):
    """
    Performs a Monte Carlo on the redshift distribution of input galaxies

    INPUTS:
        - niter (int)   = Number of MC iterations to run
        - zs (array)    = List of median redshift values
        - weights (array)   = List of the MC weights for each object. A new z is drawn from the PDF if random_number >= weight
        - z_range (array)   = Range of redshifts to keep
        - MC_fun (fn)   = Python function used to generate the new redshift values for the galaxies
        - plot_field (str)    = Path to the directory where plots should be saved. If left as "", then no plots are saved
        - plot_zrange (str)    = Path to the directory where plots of galaxies in z_range should be saved.
        - verbose (bool)    = If you want to print the status bar via tqdm.notebook
        - **kwargs = For the MC_fun
        
    OUTPUTS:
        - (array) --> Indices in redshift array of objects falling in z_range at least once
        - (array) --> 2D array of redshifts of shape (len(zs), niter)
    """
    new_zs = [] # Fill with new redshifts
    
    iterable = tqdm(range(niter)) if verbose else range(niter)

    for n in iterable:

        z_in = np.copy(zs)


        new_idxs = np.where( np.random.random(size=len(zs)) >= weights )     # Indices of zs that need to be replaced

        nz = MC_fn(*kwargs.values())        # Generate the set of redshifts

        z_in[new_idxs] = nz[new_idxs]    # Replace redshifts as dictated by the MC
        new_zs.append(z_in)


        if plot_field != "":
            ### PLOT ###
            fig, ax = plt.subplots()
            bbox = dict(boxstyle='round', fc = "white", ec='k', alpha=0.5)
            ax.hist(z_in, bins=np.arange(0,8,0.05))
            ax.set_title(f"Redshift Distribution of C20 -- (MC_iter {n})")
            ax.text(0.7,0.9, f"# of Galaxies = {len(np.where((0 <=z_in) & (10>= z_in))[0])}", fontsize=7, bbox=bbox, transform=ax.transAxes)
            ax.set_xlabel("z")
            ax.set_ylabel("N")
            try:
                fig.savefig(plot_field + f"run_{n}")
            except:
                os.mkdir(plot_field)
                fig.savefig(plot_field + f"run_{n}")
            plt.close()


        if plot_zrange != "":
            fig, ax = plt.subplots()
            ax.hist(z_in, bins=np.arange(2,3,0.01))
            ax.set_title(f"Redshift Distribution of Field -- (MC_iter {n})")
            ax.text(0.7, 0.9, f"# of Galaxies = {len(np.where((z_range[0] <=z_in) & (z_range[1]>= z_in))[0])}", 
                    fontsize=7, bbox=bbox, transform=ax.transAxes)
            ax.set_xlabel("z")
            ax.set_ylabel("N")
            try:
                fig.savefig(plot_zrange + f"run_{n}")
            except:
                os.mkdir(plot_zrange)
                fig.savefig(plot_zrange + f"run_{n}")
            plt.close()



    new_zs = np.array(new_zs)


    ## Keep only galaxies that fell within correct z-range at least once ###
    z_bool = ((z_range[0]< new_zs) & (new_zs < z_range[1])).any(axis=0)
    good_idxs = np.where(z_bool)[0]     # Where the condition is met


    return good_idxs, new_zs.transpose()

In [4]:
def my_PDF(xs, l68, u68):
    """
    PDFs to draw the new redshifts from. Skewed-normal based on the confidence interval from COSMOS2020

    INPUTS:
        - xs (array)    = Median redshift values
        - l68 (array)   = Lower bound of the 68% confidence interval
        - u68 (array)   = Upper bound of the 68% confidence interval
    OUTPUTS:
        - (array)   = New redshift values. redshifts <0 or unavailable are marked -99
    """
    omega = np.sqrt((l68**2 + u68**2)/2)        # Scale of skew-normal  
    alpha = ((np.pi/2)**0.5 * (u68-l68)) / np.sqrt(2*l68**2 + 2*u68**2 - np.pi*(u68-l68)**2/2)  # Skewness
    bad_idxs = np.where((omega != omega) | (alpha != alpha) )   # Find nan's in either array

    # Replace NaNs with 1 temporarily for calculation
    alpha[bad_idxs] = 1     
    omega[bad_idxs] = 1

    z_vals = skewnorm.rvs(a=alpha, loc=xs, scale=omega) # Find new zs based on skew-normal
    z_vals[bad_idxs] = np.nan  # Replace NaNs with -99
    return z_vals

---

----
---
---

# MC C20 Photometry

In [5]:
## READ IN FILE ##
cosmos_file = fits.open(r"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/COSMOS2020_CLASSIC_R1_v2.0.fits")
c20p = cosmos_file[1].data

## FIND BAD GALAXIES ##
bad_ids = np.where((np.isnan(c20p["lp_zPDF"]) == True) |        # No redshift from lephare
                   (np.isnan(c20p["lp_zPDF_l68"]) == True) |    # No lower-68-percentile from lephare
                   (np.isnan(c20p["lp_zPDF_u68"]) == True))[0]  # no upper-68-percentile from lephare

print(f"Number of galaxies = {len(c20p)}")
print(f"Number of bad galaxies = {len(bad_ids)}")

## INSERT TEMP DATA IN BAD IDs ##
c20p["lp_zPDF"][bad_ids] = 2
c20p["lp_zPDF_l68"][bad_ids] = 1.9
c20p["lp_zPDF_u68"][bad_ids] = 2.1

Number of galaxies = 1720700
Number of bad galaxies = 19258


In [6]:
#### RUN THE MC ####
# ========================================================
# ========================================================
for run in range(4):
    niter = 250      # Number of iterations

    z_range = [2,3]         # Redshift range for 
    plot_field = f"./MC_iterations/c20p_total_{run}/"
    plot_zrange = f"./MC_iterations/c20p_Hyper_{run}/"

    phot_med = c20p["lp_zPDF"]
    phot_l68 = phot_med - c20p["lp_zPDF_l68"]
    phot_u68 = c20p["lp_zPDF_u68"] - phot_med

    # ========================================================
    # ========================================================
    # ========================================================

    ## MC ##
    phot_ids, new_pzs = MCz(niter, phot_med, np.zeros(len(c20p)), z_range, my_PDF, plot_field,
                        plot_zrange, verbose=True, xs = phot_med, l68 = phot_l68, u68 = phot_u68)

    ## Update bad galaxies ##
    new_pzs[bad_ids] = np.full(shape=(len(bad_ids), niter), fill_value=-99)

    ## WRITE TO RESULT FILE ##

    # Update dtypes
    dtypes = [c20p.dtype.descr[0]] + [(f"MC_iter{n}", ">f8") for n in range(niter)]

    # Make array to fill
    write_arr = np.zeros(shape=(len(c20p)), dtype=dtypes)

    write_arr["ID"] = c20p["ID"]
    for n in range(niter):
        write_arr[f"MC_iter{n}"] = new_pzs[:,n]

    np.save(rf"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/C20_MC_250_{run}.npy", write_arr)

0
50
100
150
200


---

# MC Spectra and Grism 

In [3]:
## LOAD DATA ##
# PHOTO-Zs
cosmos_file = fits.open(r"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/COSMOS2020_CLASSIC_R1_v2.0.fits")
c20p = cosmos_file[1].data

# SPECTRA
specz_cat = np.loadtxt("./Data/master_specz_COSMOS_BF_v4b.cat", dtype=object)   # Load in the data
# Fix up the formatting for the spec data-file:
new_array = []
for idx in range(specz_cat.shape[1]):
    try:
        col = specz_cat[:,idx].astype(np.float32)
    except:
        col = specz_cat[:,idx]
    new_array.append(col)

c20s = np.array(new_array, dtype=object)
c20s = np.transpose(c20s)

miss_spec = np.where(c20s[:,0] == -99)[0]   # spectra not in the cosmos catalog

print(f"Number of C20 spectra: {c20s.shape[0]}")
print(f"Number of missing spectra: {len(miss_spec)}")

# ----------------------------------------------------------------------
# ----------------------------------------------------------------------

# GRISM
griz_cat = np.loadtxt("./Data/HST_Hyp.v1.1.cat", skiprows=1, usecols=range(15), dtype=object)   # Load in the data
# Fix up the formatting for the spec data-file:
new_array = []
for idx in range(griz_cat.shape[1]):
    try:
        col = griz_cat[:,idx].astype(np.float32)
    except:
        col = griz_cat[:,idx]
    new_array.append(col)

griz = np.array(new_array, dtype=object)
griz = np.transpose(griz)

miss_griz = np.where(griz[:,4] == -99)[0]   # spectra not in the cosmos catalog

print(f"Number of Grism redshifts: {griz.shape[0]}")
print(f"Number of missing grisms: {len(miss_griz)}")

Number of C20 spectra: 42776
Number of missing spectra: 2562
Number of Grism redshifts: 12764
Number of missing grisms: 53


In [4]:
## FIND COMMON OBJECTS ##
gids = []       # idx in grism catalog of common object
sids = []       # idx in spectrum catalog of common object 

sim_objs = []   # Keep track of info of the object for MC use --> [C20_ID, zs, qf_s, zg, qf_gz]

for g_id, c_id in enumerate(griz[:,4]):
    if c_id > 0:    # Make sure it's a cosmos object
        t = np.where(c_id == c20s[:,0])[0]

        if len(t) != 0: # Object is in both
            gids.append(g_id)   # Add grism id
            sids.append(t[0])   # Add spec id
            sim_objs.append([c_id, griz[g_id][9], griz[g_id][10], griz[g_id][12], griz[g_id][14]])


# Create unique catalogs
spec_unique = np.delete(c20s, sids, axis=0)
griz_unique = np.delete(griz, gids, axis=0)
sim_objs = np.array(sim_objs)

print(f"Unique Grizli Objects = {len(griz_unique)}")
print(f"Unique Spec Objects = {len(spec_unique)}")
print(f"Common Objects = {len(sim_objs)}")

Unique Grizli Objects = 10053
Unique Spec Objects = 40106
Common Objects = 2711


In [40]:
## PREP STORAGE ARRAY ## 
niter = 1000

dtypes = [c20p.dtype.descr[0]] + [(f"MC_iter{n}", ">f8") for n in range(niter)]

# Make array to fill
spec_mc = np.zeros(shape=(len(spec_unique) + len(griz_unique) + len(sim_objs)), dtype=dtypes)

## MC C20 Spectra

In [41]:
## Pack parameters of spectra from cosmos catalog ##
bad_spec = [] # where parameters have a nan
gal_params = []
for s_id, c_id in enumerate(spec_unique[:,0].astype(int)):

    if c_id == -99:     # Not a cosmos object
        bad_spec.append(s_id)
        gal_params.append([2, 1.9, 2.1])    # make up temporary parameters

    else:   # It is a cosmos object
        med = c20p["lp_zPDF"][c_id-1]
        l68 = med - c20p["lp_zPDF_l68"][c_id-1]
        u68 = c20p["lp_zPDF_u68"][c_id-1] - med


        if (med != med) or (l68 != l68) or (u68 != u68):   # p(z) contains a NaN
            bad_spec.append(s_id)
            gal_params.append([2, 1.9, 2.1])    # make up temporary parameters

        else:
            gal_params.append([med, l68, u68])

gal_params = np.array(gal_params)
bad_spec = np.array(bad_spec)
print(f"Number of bad p(z)'s for specta: {len(bad_spec)}")

Number of bad p(z)'s for specta: 3911


In [42]:
#### RUN THE MC ####
# ========================================================
# ========================================================
z_range = [2,3]         # Redshift range for 
plot_field = "./MC_iterations/c20s_total/"
plot_zrange = "./MC_iterations/c20s_Hyper/"

spec_z = spec_unique[:,11]         # orginal spec-z
spec_med = gal_params[:,0]  # parameters for the p(z)
spec_l68 = gal_params[:,1]
spec_u68 = gal_params[:,2]

# Set the MC weights based on the quality flags
qfs = spec_unique[:,13] % 10      # 
spec_weights = np.select( [(qfs >=2.)&(qfs<3.),(qfs>=9.)&(qfs<10.), (qfs>=3.)&(qfs<5.) ],
                [0.7, 0.7, 0.993],
                default=0)
# ========================================================
# ========================================================
# ========================================================

## MC ##
spec_ids, new_szs = MCz(niter, spec_z, spec_weights, z_range, my_PDF, plot_field,
                    plot_zrange, verbose=True, xs = spec_med, l68 = spec_l68, u68 = spec_u68)

## Update bad galaxies ##
new_szs[bad_spec] = np.full(shape=(len(bad_spec), niter), fill_value=-99)

## WRITE TO RESULT FILE ##

# # Update dtypes
# dtypes = [c20p.dtype.descr[0]] + [(f"MC_iter{n}", ">f8") for n in range(niter)]

# # Make array to fill
# write_arr = np.zeros(shape=(len(spec_unique)), dtype=dtypes)

# write_arr["ID"] = spec_unique[:,0]
# for n in range(niter):
#     write_arr[f"MC_iter{n}"] = new_szs[:,n]

# np.save(r"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/C20_spec_MC_1000.npy", write_arr)

spec_mc["ID"][:len(new_szs)] = spec_unique[:,0]     # update with cosmos IDs

for n in range(niter):
    spec_mc[f"MC_iter{n}"][:len(new_szs)] = new_szs[:,n]

  0%|          | 0/1000 [00:00<?, ?it/s]

0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950


---

## MC Grizli Spectra

In [43]:
## Pack parameters of spectra from cosmos catalog ##
bad_griz = [] # where parameters have a nan
griz_params = []
for g_id, c_id in enumerate(griz_unique[:,4].astype(int)):

    if c_id == -99:     # Not a cosmos object
        bad_griz.append(g_id)
        griz_params.append([2, 1.9, 2.1])    # make up temporary parameters

    else:   # It is a cosmos object
        med = c20p["lp_zPDF"][c_id-1]
        l68 = med - c20p["lp_zPDF_l68"][c_id-1]
        u68 = c20p["lp_zPDF_u68"][c_id-1] - med


        if (med != med) or (l68 != l68) or (u68 != u68):   # p(z) contains a NaN
            bad_griz.append(g_id)
            griz_params.append([2, 1.9, 2.1])    # make up temporary parameters

        else:
            griz_params.append([med, l68, u68])

griz_params = np.array(griz_params)
bad_griz = np.array(bad_griz)
print(f"Number of bad p(z)'s for specta: {len(bad_griz)}")

Number of bad p(z)'s for specta: 485


In [44]:
#### RUN THE MC ####
# ========================================================
# ========================================================
z_range = [2,3]         # Redshift range for 
plot_field = "./MC_iterations/griz_total/"
plot_zrange = "./MC_iterations/griz_Hyper/"

griz_z = griz_unique[:,12].astype(float)         # orginal spec-z
griz_width = 46/14100*(1+griz_z)       # Width of the normal distribution to draw from

griz_med = griz_params[:,0]  # parameters for the p(z)
griz_l68 = griz_params[:,1]
griz_u68 = griz_params[:,2]


# Set the MC weights based on the quality flags
qfs = griz_unique[:,-1]  
griz_weights = np.select( [qfs==5, qfs==4, qfs==3 ],
                [0.925, 0.818, 0.668],
                default=0)

# ========================================================
# ========================================================
# ========================================================
spec_mc["ID"][len(new_szs):len(new_szs) + len(griz_unique)] = griz_unique[:,4]     # update with cosmos IDs

for n in tqdm(range(niter)):

    gzs = np.random.normal(griz_z, griz_width)

    ## MC ##
    griz_ids, new_g = MCz(1, gzs, griz_weights, z_range, my_PDF, plot_field,
                        plot_zrange, verbose=False, xs = griz_med, l68 = griz_l68, u68 = griz_u68)
    
    new_gzs = new_g.flatten()
    new_gzs[bad_griz] = -99
    
    spec_mc[f"MC_iter{n}"][len(new_szs):len(new_szs) + len(griz_unique)] = new_gzs


## Update bad galaxies ##
# new_gzs[bad_griz] = np.full(shape=(len(bad_griz), niter), fill_value=-99)

## WRITE TO RESULT FILE ##

# Update dtypes
# dtypes = [c20p.dtype.descr[0]] + [(f"MC_iter{n}", ">f8") for n in range(niter)]

# Make array to fill
# write_arr = np.zeros(shape=(len(griz)), dtype=dtypes)

# write_arr["ID"] = griz[:,0]
# for n in range(niter):
#     write_arr[f"MC_iter{n}"] = new_gzs[:,n]

# np.save(r"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/grizli_MC_1000.npy", write_arr)

  0%|          | 0/1000 [00:00<?, ?it/s]

---

## MC Common Objects

In [45]:
## Pack parameters of spectra from cosmos catalog ##
bad_com = [] # where parameters have a nan
com_params = []
for idx, c_id in enumerate(sim_objs[:,0].astype(int)):

    med = c20p["lp_zPDF"][c_id-1]
    l68 = med - c20p["lp_zPDF_l68"][c_id-1]
    u68 = c20p["lp_zPDF_u68"][c_id-1] - med


    if (med != med) or (l68 != l68) or (u68 != u68):   # p(z) contains a NaN
        bad_com.append(idx)
        com_params.append([2, 1.9, 2.1])    # make up temporary parameters

    else:
        com_params.append([med, l68, u68])

com_params = np.array(com_params)
bad_com = np.array(bad_com)
print(f"Number of bad p(z)'s for specta: {len(bad_com)}")

Number of bad p(z)'s for specta: 105


In [64]:
#### RUN THE MC ####
# ========================================================
# ========================================================
z_range = [2,3]         # Redshift range for 
plot_field = "./MC_iterations/sim_total/"
plot_zrange = "./MC_iterations/sim_Hyper/"

com_med = com_params[:,0]  # parameters for the p(z)
com_l68 = com_params[:,1]
com_u68 = com_params[:,2]


## Weights

# Spectra weights
qfs = sim_objs[:,2] % 10      # 
spec_weights = np.select( [(qfs >=2.)&(qfs<3.),(qfs>=9.)&(qfs<10.), (qfs>=3.)&(qfs<5.) ],
                [0.7, 0.7, 0.993],
                default=0)

# Grizli weights 
qfg = sim_objs[:,-1]  
griz_weights = np.select( [qfg==5, qfg==4, qfg==3 ],
                [0.925, 0.818, 0.668],
                default=0)

# Combine
sim_weights = np.c_[spec_weights, griz_weights]

# Keep track of which flag is higher
max_id = np.argmax(sim_weights, axis=1)

# Sort the weights
sim_weights = np.sort(sim_weights, axis=1)


spec_mc["ID"][len(new_szs) + len(griz_unique):] = sim_objs[:,0]     # update with cosmos IDs
# ========================================================
# ========================================================
# ========================================================

for n in tqdm(range(1000)):

    # Draw random number for each object:
    mc_rns = np.random.random(size=len(sim_weights))

    # Choose specz, griz, or photoz
    z_choice = []   
    for rn_idx, rn in enumerate(mc_rns):
        sw = sim_weights[rn_idx]    # weights for this spectrum

        if rn < sw[1]: 
            z_choice.append(max_id[rn_idx]) # Choose better spectrum

        elif (rn >=sw[1]) and (rn < sw[1]+sw[0]*(1-sw[1])):
            z_choice.append(not(max_id[rn_idx]))    # Choose worse spectrum

        else:
            z_choice.append(2)  # Choose photoz

    
    # Make random grism redshifts
    g_rand = np.random.normal(sim_objs[:,3], 46/14100*(1+sim_objs[:,3]) )

    # Pick which redshift to use
    z_meds = np.select([z_choice == 0, z_choice == 1, z_choice == 2], 
                       [sim_objs[:,1],  g_rand, 2])
    
    # Assign weights
    ws = [0 if zi == 2 else 1 for zi in z_choice]


    ## MC ##
    _, new_sim = MCz(1, z_meds, ws, z_range, my_PDF, plot_field,
                        plot_zrange, verbose=False, xs = com_med, l68 = com_l68, u68 = com_u68)
    
    new_simz = new_sim.flatten()
    new_simz[bad_com] = -99
    
    spec_mc[f"MC_iter{n}"][len(new_szs) + len(griz_unique):] = new_simz

  0%|          | 0/1000 [00:00<?, ?it/s]

In [65]:
np.save(r"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/MC_spec.npy", spec_mc)

----
----
----


# Fix up stuff

In [66]:
### Sort spectra by cosmos ID ###
sorted_spec = np.sort(spec_mc)

In [74]:
niter = 1000

dtypes = [c20p.dtype.descr[0]] + [("zs", ">f8"), ("zg", ">f8")] + [(f"MC_iter{n}", ">f8") for n in range(niter)]

# Make array to fill
final_spec = np.zeros(shape=(len(sorted_spec)), dtype=dtypes)

final_spec["ID"] = sorted_spec["ID"]

# Add original redshifts
zs = []
zg = []

for id in tqdm(final_spec["ID"]):
    # Get original specz
    spec_check = np.where(c20s[:,0] == id)[0]
    if len(spec_check)!= 0: zs.append(c20s[:,11][spec_check[0]])
    else: zs.append(-99)

    # get original griz-z
    griz_check = np.where(griz[:,4] == id)[0]
    if len(griz_check) != 0: zg.append(griz[:,12][griz_check[0]])
    else: zg.append(-99)

final_spec["zs"] = zs
final_spec["zg"] = zg

for n in tqdm(range(niter)):
    final_spec[f"MC_iter{n}"] = sorted_spec[f"MC_iter{n}"]

np.save(r"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/C20spec_MC_1000.npy", final_spec)


  0%|          | 0/52870 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

In [73]:
cosmos_file = fits.open(r"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/COSMOS2020_CLASSIC_R1_v2.0.fits")
c20p = cosmos_file[1].data

niter = 250

for run in range(4):

    mcs = np.load(rf"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/C20_MC_250_{run}.npy")


    dtypes = [c20p.dtype.descr[0]] + [("lp_zPDF", ">f8")] + [(f"MC_iter{n}", ">f8") for n in range(niter)]

    # Make array to fill
    final = np.zeros(shape=(len(mcs)), dtype=dtypes)

    final["ID"] = mcs["ID"]

    final["lp_zPDF"] = c20p["lp_zPDF"]

    for n in tqdm(range(niter)):
        final[f"MC_iter{n}"] = mcs[f"MC_iter{n}"]


    mcs = np.save(rf"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/C20_MC_250_{run}_redo.npy", final)



  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]