In [24]:
from astropy.cosmology import Planck18
from astropy.coordinates import SkyCoord
import astropy.units as u

from astropy.table import Table, Column
import pandas as pd
import os
import glob
import random

In [2]:
# original files are found here https://data.desi.lbl.gov/public/edr/vac/edr/lss/v2.0/LSScats/clustering/
lss_data_N = Table.read("data/QSO_N_clustering.dat.fits")
lss_data_S = Table.read("data/QSO_S_clustering.dat.fits")

# For data file:

In [3]:
# Filter one-dimensional columns
names_N = [name for name in lss_data_N.colnames if len(lss_data_N[name].shape) <= 1]
names_S = [name for name in lss_data_S.colnames if len(lss_data_S[name].shape) <= 1]

# Convert to pandas DataFrame with only valid columns
df_N = lss_data_N[names_N].to_pandas()
df_S = lss_data_S[names_S].to_pandas()

# Concatenate both DataFrames
df_total = pd.concat([df_N, df_S], ignore_index=True)
df_total.rename(columns={'Z': 'REDSHIFT'}, inplace=True)

In [4]:
# Group by ROSETTE_NUMBER
df = {}
for rosette in sorted(df_total['ROSETTE_NUMBER'].unique()):
    df[rosette] = df_total[df_total['ROSETTE_NUMBER'] == rosette].copy()

### Calculate comovil distance and coordinates

In [5]:
cosmo = Planck18 # define cosmology

In [7]:
for i in range(len(df)):
    redshift = df[i]['REDSHIFT'].values
    ra = df[i]['RA'].values
    dec = df[i]['DEC'].values
    
    # comoving distance
    d_comov = cosmo.comoving_distance(redshift).to(u.Mpc).value
    df[i]['COMOVING_DISTANCE'] = d_comov

    coords = SkyCoord(ra=ra*u.deg, dec=dec*u.deg, distance=d_comov*u.Mpc, frame='icrs')

    # cartesian coordinates
    x = coords.cartesian.x.value
    y = coords.cartesian.y.value
    z = coords.cartesian.z.value 

    df[i]['X'] = coords.cartesian.x.to(u.Mpc).value
    df[i]['Y'] = coords.cartesian.y.to(u.Mpc).value
    df[i]['Z'] = coords.cartesian.z.to(u.Mpc).value 

### Save files

In [34]:
os.makedirs("create_files", exist_ok=True)

In [13]:
for i in range(len(df)):
    targetid = df[i]['TARGETID']
    x = Column(df[i]['X'], unit=u.Mpc)
    y = Column(df[i]['Y'], unit=u.Mpc)
    z = Column(df[i]['Z'], unit=u.Mpc)  

    table = Table([targetid, x, y, z], names=('TARGETID', 'X', 'Y', 'Z'))

    filename = f"create_files/QSO_{i}_clustering_data.ecsv"
    table.write(filename, format='ascii.ecsv', overwrite=True)

    print(f"Saved file: {filename}")

Saved file: create_files/QSO_0_clustering_data.ecsv
Saved file: create_files/QSO_1_clustering_data.ecsv
Saved file: create_files/QSO_2_clustering_data.ecsv
Saved file: create_files/QSO_3_clustering_data.ecsv
Saved file: create_files/QSO_4_clustering_data.ecsv
Saved file: create_files/QSO_5_clustering_data.ecsv
Saved file: create_files/QSO_6_clustering_data.ecsv
Saved file: create_files/QSO_7_clustering_data.ecsv
Saved file: create_files/QSO_8_clustering_data.ecsv
Saved file: create_files/QSO_9_clustering_data.ecsv
Saved file: create_files/QSO_10_clustering_data.ecsv
Saved file: create_files/QSO_11_clustering_data.ecsv
Saved file: create_files/QSO_12_clustering_data.ecsv
Saved file: create_files/QSO_13_clustering_data.ecsv
Saved file: create_files/QSO_14_clustering_data.ecsv
Saved file: create_files/QSO_15_clustering_data.ecsv
Saved file: create_files/QSO_16_clustering_data.ecsv
Saved file: create_files/QSO_17_clustering_data.ecsv
Saved file: create_files/QSO_18_clustering_data.ecsv
Sav

# For random file:

In [37]:
# Directorio donde están los archivos .ran
ran_files_N = sorted(glob.glob("data/QSO_N_*_clustering.ran.fits"))
ran_files_S = sorted(glob.glob("data/QSO_S_*_clustering.ran.fits"))

In [23]:
# ROSETTE_NUMBERs per hemisphere
rosettas_N = set(lss_data_N['ROSETTE_NUMBER'])
rosettas_S = set(lss_data_S['ROSETTE_NUMBER'])

In [None]:
# Maintain list of used .ran files to avoid repetition
used_ran_N = set()
used_ran_S = set()

# Process each df[i]
for i in range(len(df)):
    rosetta = df[i]['ROSETTE_NUMBER'].iloc[0]
    n_rows = len(df[i])

    # Choose hemisphere
    if rosetta in rosettas_N:
        candidates = [f for f in ran_files_N if f not in used_ran_N]
        ran_file = random.choice(candidates)
        used_ran_N.add(ran_file)
    elif rosetta in rosettas_S:
        candidates = [f for f in ran_files_S if f not in used_ran_S]
        ran_file = random.choice(candidates)
        used_ran_S.add(ran_file)
        
    print(f"→ df[{i}] uses random file: {ran_file}")

    # Read random file and sample
    ran_table = Table.read(ran_file)
    ran_df = ran_table.to_pandas().sample(n=n_rows, random_state=42).reset_index(drop=True) # Same length as the _dat file

    z_ran = ran_df['Z'].values
    d_comov = cosmo.comoving_distance(z_ran).to(u.Mpc)
    ran_df['COMOVING_DISTANCE'] = d_comov.value

    coords = SkyCoord(ra=ran_df['RA'].values * u.deg,
                    dec=ran_df['DEC'].values * u.deg,
                    distance=d_comov)

    x = Column(coords.cartesian.x.to(u.Mpc), name='X')
    y = Column(coords.cartesian.y.to(u.Mpc), name='Y')
    z = Column(coords.cartesian.z.to(u.Mpc), name='Z')

    output_table = Table([x, y, z])
    filename = f"create_files/QSO_{i}_clustering_random.ecsv"
    output_table.write(filename, format='ascii.ecsv', overwrite=True)
    print(f"Saved file: {filename}")

→ df[0] uses random file: data\QSO_S_10_clustering.ran.fits
Saved file: create_files/QSO_0_clustering_random.ecsv
→ df[1] uses random file: data\QSO_S_11_clustering.ran.fits
Saved file: create_files/QSO_1_clustering_random.ecsv
→ df[2] uses random file: data\QSO_S_4_clustering.ran.fits
Saved file: create_files/QSO_2_clustering_random.ecsv
→ df[3] uses random file: data\QSO_N_14_clustering.ran.fits
Saved file: create_files/QSO_3_clustering_random.ecsv
→ df[4] uses random file: data\QSO_S_1_clustering.ran.fits
Saved file: create_files/QSO_4_clustering_random.ecsv
→ df[5] uses random file: data\QSO_S_13_clustering.ran.fits
Saved file: create_files/QSO_5_clustering_random.ecsv
→ df[6] uses random file: data\QSO_N_10_clustering.ran.fits
Saved file: create_files/QSO_6_clustering_random.ecsv
→ df[7] uses random file: data\QSO_N_16_clustering.ran.fits
Saved file: create_files/QSO_7_clustering_random.ecsv
→ df[8] uses random file: data\QSO_S_8_clustering.ran.fits
Saved file: create_files/QSO_8_