In [2]:
import numpy as np
from scipy.stats import skewnorm
import matplotlib.pyplot as plt
from astropy.io import fits
import os

In [3]:
def MCz(niter, zs, qfs, z_range, MC_fn, plot_field="", plot_zrange="", verbose=False, **kwargs):
    """
    Performs a Monte Carlo on the redshift distribution of input galaxies

    INPUTS:
        - niter (int)   = Number of MC iterations to run
        - zs (array)    = List of median redshift values
        - qfs (array)    = Quality flags that dictate the weights of the MC. Photoz's have qf=0
        - z_range (array)   = Range of redshifts to keep
        - MC_fun (fn)   = Python function used to generate the new redshift values for the galaxies
        - plot_field (str)    = Path to the directory where plots should be saved. If left as "", then no plots are saved
        - plot_zrange (str)    = Path to the directory where plots of galaxies in z_range should be saved.
        - verbose (bool)    = If you want to print every 50 iterations as a progress report
        - **kwargs = For the MC_fun
        
    OUTPUTS:
        - (array) --> Indices in redshift array of objects falling in z_range at least once
        - (array) --> 2D array of redshifts of shape (len(zs), niter)
    """
    new_zs = [] # Fill with new redshifts

    for n in range(niter):
        if (verbose==True) and (n%50==0): print(n)
        z_in = np.copy(zs)

        # Set the MC weights based on the quality flags
        weights = np.select( [(qfs>0)&(qfs%10==2), (qfs>0)&(qfs%10==9), (qfs>0)&(qfs%10==3), (qfs>0)&(qfs%10==4) ],
                [0.7, 0.7, 0.993, 0.993],
                default=0)

        new_idxs = np.where( np.random.random(size=len(zs)) >= weights )     # Indices of zs that need to be replaced

        nz = MC_fn(*kwargs.values())        # Generate the set of redshifts

        z_in[new_idxs] = nz[new_idxs]    # Replace redshifts as dictated by the MC
        new_zs.append(z_in)


        if plot_field != "":
            ### PLOT ###
            fig, ax = plt.subplots()
            bbox = dict(boxstyle='round', fc = "white", ec='k', alpha=0.5)
            ax.hist(z_in, bins=np.arange(0,8,0.05))
            ax.set_title(f"Redshift Distribution of C20 -- (MC_iter {n})")
            ax.text(0.7,0.9, f"# of Galaxies = {len(np.where((0 <=z_in) & (10>= z_in))[0])}", fontsize=7, bbox=bbox, transform=ax.transAxes)
            ax.set_xlabel("z")
            ax.set_ylabel("N")
            try:
                fig.savefig(plot_field + f"run_{n}")
            except:
                os.mkdir(plot_field)
                fig.savefig(plot_field + f"run_{n}")
            plt.close()


        if plot_zrange != "":
            fig, ax = plt.subplots()
            ax.hist(z_in, bins=np.arange(2,3,0.01))
            ax.set_title(f"Redshift Distribution of Field -- (MC_iter {n})")
            ax.text(0.7, 0.9, f"# of Galaxies = {len(np.where((z_range[0] <=z_in) & (z_range[1]>= z_in))[0])}", 
                    fontsize=7, bbox=bbox, transform=ax.transAxes)
            ax.set_xlabel("z")
            ax.set_ylabel("N")
            try:
                fig.savefig(plot_zrange + f"run_{n}")
            except:
                os.mkdir(plot_zrange)
                fig.savefig(plot_zrange + f"run_{n}")
            plt.close()



    new_zs = np.array(new_zs)


    ## Keep only galaxies that fell within correct z-range at least once ###
    z_bool = ((z_range[0]< new_zs) & (new_zs < z_range[1])).any(axis=0)
    good_idxs = np.where(z_bool)[0]     # Where the condition is met


    return good_idxs, new_zs.transpose()

In [4]:
def my_PDF(xs, l68, u68):
    """
    PDFs to draw the new redshifts from. Skewed-normal based on the confidence interval from COSMOS2020

    INPUTS:
        - xs (array)    = Median redshift values
        - l68 (array)   = Lower bound of the 68% confidence interval
        - u68 (array)   = Upper bound of the 68% confidence interval
    OUTPUTS:
        - (array)   = New redshift values. redshifts <0 or unavailable are marked -99
    """
    omega = np.sqrt((l68**2 + u68**2)/2)        # Scale of skew-normal  
    alpha = ((np.pi/2)**0.5 * (u68-l68)) / np.sqrt(2*l68**2 + 2*u68**2 - np.pi*(u68-l68)**2/2)  # Skewness
    bad_idxs = np.where((omega != omega) | (alpha != alpha) )   # Find nan's in either array

    # Replace NaNs with 1 temporarily for calculation
    alpha[bad_idxs] = 1     
    omega[bad_idxs] = 1

    z_vals = skewnorm.rvs(a=alpha, loc=xs, scale=omega) # Find new zs based on skew-normal
    z_vals[bad_idxs] = np.nan  # Replace NaNs with -99
    return z_vals

----
---
---

# MC C20 Photometry

In [5]:
## READ IN FILE ##
cosmos_file = fits.open(r"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/COSMOS2020_CLASSIC_R1_v2.0.fits")
c20p = cosmos_file[1].data

## FIND BAD GALAXIES ##
bad_ids = np.where((np.isnan(c20p["lp_zPDF"]) == True) |        # No redshift from lephare
                   (np.isnan(c20p["lp_zPDF_l68"]) == True) |    # No lower-68-percentile from lephare
                   (np.isnan(c20p["lp_zPDF_u68"]) == True))[0]  # no upper-68-percentile from lephare

print(f"Number of galaxies = {len(c20p)}")
print(f"Number of bad galaxies = {len(bad_ids)}")

## INSERT TEMP DATA IN BAD IDs ##
c20p["lp_zPDF"][bad_ids] = 2
c20p["lp_zPDF_l68"][bad_ids] = 1.9
c20p["lp_zPDF_u68"][bad_ids] = 2.1

Number of galaxies = 1720700
Number of bad galaxies = 19258


In [6]:
#### RUN THE MC ####
# ========================================================
# ========================================================
for run in range(4):
    niter = 250      # Number of iterations

    z_range = [2,3]         # Redshift range for 
    plot_field = f"./MC_iterations/c20p_total_{run}/"
    plot_zrange = f"./MC_iterations/c20p_Hyper_{run}/"

    phot_med = c20p["lp_zPDF"]
    phot_l68 = phot_med - c20p["lp_zPDF_l68"]
    phot_u68 = c20p["lp_zPDF_u68"] - phot_med

    # ========================================================
    # ========================================================
    # ========================================================

    ## MC ##
    phot_ids, new_pzs = MCz(niter, phot_med, np.zeros(len(c20p)), z_range, my_PDF, plot_field,
                        plot_zrange, verbose=True, xs = phot_med, l68 = phot_l68, u68 = phot_u68)

    ## Update bad galaxies ##
    new_pzs[bad_ids] = np.full(shape=(len(bad_ids), niter), fill_value=-99)

    ## WRITE TO RESULT FILE ##

    # Update dtypes
    dtypes = [c20p.dtype.descr[0]] + [(f"MC_iter{n}", ">f8") for n in range(niter)]

    # Make array to fill
    write_arr = np.zeros(shape=(len(c20p)), dtype=dtypes)

    write_arr["ID"] = c20p["ID"]
    for n in range(niter):
        write_arr[f"MC_iter{n}"] = new_pzs[:,n]

    np.save(rf"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/C20_MC_250_{run}.npy", write_arr)

0
50
100
150
200


---

# MC C20 Spectra

In [11]:
## LOAD THE SPECTRA FILE ##
specz_cat = np.loadtxt("./Data/master_specz_COSMOS_BF_v4b.cat", dtype=object)   # Load in the data
# Fix up the formatting for the spec data-file:
new_array = []
for idx in range(specz_cat.shape[1]):
    try:
        col = specz_cat[:,idx].astype(np.float32)
    except:
        col = specz_cat[:,idx]
    new_array.append(col)

c20s = np.array(new_array, dtype=object)
c20s = np.transpose(c20s)

miss_spec = np.where(c20s[:,0] == -99)[0]   # spectra not in the cosmos catalog

print(f"Number of C20 spectra: {c20s.shape[0]}")
print(f"Number of missing spectra: {len(miss_spec)}")

Number of C20 spectra: 42776
Number of missing spectra: 2562


In [12]:
## Pack parameters of spectra from cosmos catalog ##
bad_spec = [] # where parameters have a nan
gal_params = []
for s_id, c_id in enumerate(c20s[:,0].astype(int)):

    if c_id == -99:     # Not a cosmos object
        bad_spec.append(s_id)
        gal_params.append([2, 1.9, 2.1])    # make up temporary parameters

    else:   # It is a cosmos object
        med = c20p["lp_zPDF"][c_id-1]
        l68 = c20p["lp_zPDF_l68"][c_id-1]
        u68 = c20p["lp_zPDF_u68"][c_id-1]


        if (med != med) or (l68 != l68) or (u68 != u68):   # p(z) contains a NaN
            bad_spec.append(s_id)
            gal_params.append([2, 1.9, 2.1])    # make up temporary parameters

        else:
            gal_params.append([med, l68, u68])

gal_params = np.array(gal_params)
bad_spec = np.array(bad_spec)
print(f"Number of bad p(z)'s for specta: {len(bad_spec)}")

Number of bad p(z)'s for specta: 2562


In [13]:
#### RUN THE MC ####
# ========================================================
# ========================================================
niter = 1000      # Number of iterations

z_range = [2,3]         # Redshift range for 
plot_field = "./MC_iterations/c20s_total/"
plot_zrange = "./MC_iterations/c20s_Hyper/"

spec_z = c20s[:,11]         # orginal spec-z
spec_med = gal_params[:,0]  # parameters for the p(z)
spec_l68 = gal_params[:,1]
spec_u68 = gal_params[:,2]

# ========================================================
# ========================================================
# ========================================================

## MC ##
spec_ids, new_szs = MCz(niter, spec_z, c20s[:,13], z_range, my_PDF, plot_field,
                    plot_zrange, verbose=True, xs = spec_med, l68 = spec_l68, u68 = spec_u68)

## Update bad galaxies ##
new_szs[bad_spec] = np.full(shape=(len(bad_spec), niter), fill_value=-99)

## WRITE TO RESULT FILE ##

# Update dtypes
dtypes = [c20p.dtype.descr[0]] + [(f"MC_iter{n}", ">f8") for n in range(niter)]

# Make array to fill
write_arr = np.zeros(shape=(len(c20s)), dtype=dtypes)

write_arr["ID"] = c20s[:,0]
for n in range(niter):
    write_arr[f"MC_iter{n}"] = new_szs[:,n]

np.save(r"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/C20_spec_MC_1000.npy", write_arr)

0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950


---

# MC Grizli Spectra

In [3]:
## LOAD THE SPECTRA FILE ##
griz_cat = np.loadtxt("./Data/HST_Hyp.v1.1.cat", skiprows=1, usecols=range(15), dtype=object)   # Load in the data
# Fix up the formatting for the spec data-file:
new_array = []
for idx in range(griz_cat.shape[1]):
    try:
        col = griz_cat[:,idx].astype(np.float32)
    except:
        col = griz_cat[:,idx]
    new_array.append(col)

griz = np.array(new_array, dtype=object)
griz = np.transpose(griz)

miss_griz = np.where(griz[:,4] == -99)[0]   # spectra not in the cosmos catalog

print(f"Number of C20 spectra: {griz.shape[0]}")
print(f"Number of missing spectra: {len(miss_griz)}")

Number of C20 spectra: 12764
Number of missing spectra: 53


In [28]:
## Pack parameters of spectra from cosmos catalog ##
bad_griz = [] # where parameters have a nan
griz_params = []
for g_id, c_id in enumerate(griz[:,4].astype(int)):

    if c_id == -99:     # Not a cosmos object
        bad_griz.append(g_id)
        griz_params.append([2, 1.9, 2.1])    # make up temporary parameters

    else:   # It is a cosmos object
        med = c20p["lp_zPDF"][c_id-1]
        l68 = c20p["lp_zPDF_l68"][c_id-1]
        u68 = c20p["lp_zPDF_u68"][c_id-1]


        if (med != med) or (l68 != l68) or (u68 != u68):   # p(z) contains a NaN
            bad_griz.append(g_id)
            griz_params.append([2, 1.9, 2.1])    # make up temporary parameters

        else:
            griz_params.append([med, l68, u68])

griz_params = np.array(griz_params)
bad_griz = np.array(bad_griz)
print(f"Number of bad p(z)'s for specta: {len(bad_griz)}")

Number of bad p(z)'s for specta: 53


In [29]:
#### RUN THE MC ####
# ========================================================
# ========================================================
niter = 1000      # Number of iterations

z_range = [2,3]         # Redshift range for 
plot_field = "./MC_iterations/griz_total/"
plot_zrange = "./MC_iterations/griz_Hyper/"

griz_z = griz[:,12]         # orginal spec-z
griz_med = griz_params[:,0]  # parameters for the p(z)
griz_l68 = griz_params[:,1]
griz_u68 = griz_params[:,2]

# ========================================================
# ========================================================
# ========================================================

## MC ##
griz_ids, new_gzs = MCz(niter, griz_z, griz[:,-1], z_range, my_PDF, plot_field,
                    plot_zrange, verbose=True, xs = spec_med, l68 = spec_l68, u68 = spec_u68)

## Update bad galaxies ##
new_gzs[bad_griz] = np.full(shape=(len(bad_griz), niter), fill_value=-99)

## WRITE TO RESULT FILE ##

# Update dtypes
dtypes = [c20p.dtype.descr[0]] + [(f"MC_iter{n}", ">f8") for n in range(niter)]

# Make array to fill
write_arr = np.zeros(shape=(len(griz)), dtype=dtypes)

write_arr["ID"] = griz[:,0]
for n in range(niter):
    write_arr[f"MC_iter{n}"] = new_gzs[:,n]

np.save(r"C:/Users/sikor/OneDrive/Desktop/BigData/COSMOS2020/grizli_MC_1000.npy", write_arr)

0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950


---
---
---