In [None]:
from astropy.cosmology import Planck18
from astropy.coordinates import SkyCoord
import astropy.units as u

from astropy.table import Table, Column
import pandas as pd
import os
import glob
import random

In [None]:
# original files are found here https://data.desi.lbl.gov/public/dr1/survey/catalogs/dr1/LSS/iron/LSScats/v1.5/
lss_data_N = Table.read("data/QSO_NGC_clustering.dat.fits")
lss_data_S = Table.read("data/QSO_SGC_clustering.dat.fits")

# For data file:

In [None]:
# Filter one-dimensional columns
names_N = [name for name in lss_data_N.colnames if len(lss_data_N[name].shape) <= 1]
names_S = [name for name in lss_data_S.colnames if len(lss_data_S[name].shape) <= 1]

# Convert to pandas DataFrame with only valid columns
df_N = lss_data_N[names_N].to_pandas()
df_S = lss_data_S[names_S].to_pandas()

df_N["ORIGIN_FILE"] = "NGC"
df_S["ORIGIN_FILE"] = "SGC"

# Concatenate both DataFrames
df_total = pd.concat([df_N, df_S], ignore_index=True)
df_total.rename(columns={'Z': 'REDSHIFT'}, inplace=True)

### Calculate comovil distance and coordinates

In [None]:
cosmo = Planck18 # define cosmology

In [None]:
redshift = df_total['REDSHIFT'].values
ra = df_total['RA'].values
dec = df_total['DEC'].values

# comoving distance
d_comov = cosmo.comoving_distance(redshift).to(u.Mpc).value
df_total['COMOVING_DISTANCE'] = d_comov

coords = SkyCoord(ra=ra*u.deg, dec=dec*u.deg, distance=d_comov*u.Mpc, frame='icrs')

# cartesian coordinates
x = coords.cartesian.x.value
y = coords.cartesian.y.value
z = coords.cartesian.z.value 

df_total['X'] = coords.cartesian.x.to(u.Mpc).value
df_total['Y'] = coords.cartesian.y.to(u.Mpc).value
df_total['Z'] = coords.cartesian.z.to(u.Mpc).value 

### Save files

In [None]:
os.makedirs("create_files", exist_ok=True)

In [None]:
# Filter: NGC y SGC
df_ngc = df_total[df_total["ORIGIN_FILE"] == "NGC"]
df_sgc = df_total[df_total["ORIGIN_FILE"] == "SGC"]

def save_ecsv(df, name):
    targetid = Column(df["TARGETID"].values)
    x = Column(df["X"].values, unit=u.Mpc)
    y = Column(df["Y"].values, unit=u.Mpc)
    z = Column(df["Z"].values, unit=u.Mpc)

    table = Table([targetid, x, y, z], names=("TARGETID", "X", "Y", "Z"))
    filename = f"create_files/QSO_{name}_clustering_data.ecsv"
    table.write(filename, format="ascii.ecsv", overwrite=True)
    print(f"Saved file: {filename}")

save_ecsv(df_ngc, "NGC")
save_ecsv(df_sgc, "SGC")

# For random file:

In [None]:
ran_file_NGC = Table.read("data/QSO_NGC_0_clustering.ran.fits")
ran_file_SGC = Table.read("data/QSO_SGC_0_clustering.ran.fits")

In [None]:
def save_random(ran_table, data_df, label):
    # To DataFrame
    ran_df = ran_table.to_pandas()

    # Randomly select n_rows rows
    n_rows = len(data_df)
    ran_df_sampled = ran_df.sample(n=n_rows, random_state=42).reset_index(drop=True)

    # Calculate comoving distance
    z_ran = ran_df_sampled['Z'].values
    d_comov = cosmo.comoving_distance(z_ran).to(u.Mpc)
    ran_df_sampled['COMOVING_DISTANCE'] = d_comov.value

    # Calculate Cartesian coordinates
    coords = SkyCoord(
        ra=ran_df_sampled['RA'].values * u.deg,
        dec=ran_df_sampled['DEC'].values * u.deg,
        distance=d_comov
    )

    x = Column(coords.cartesian.x.to(u.Mpc), name='X')
    y = Column(coords.cartesian.y.to(u.Mpc), name='Y')
    z = Column(coords.cartesian.z.to(u.Mpc), name='Z')
    targetid = Column(ran_df_sampled['TARGETID'], name='TARGETID')

    # Save files
    output_table = Table([targetid, x, y, z])
    filename = f"create_files/QSO_{label}_clustering_random.ecsv"
    output_table.write(filename, format='ascii.ecsv', overwrite=True)
    print(f"Saved file: {filename}")

In [None]:
save_random(ran_file_NGC, df_ngc, "NGC")
save_random(ran_file_SGC, df_sgc, "SGC")