In [2]:
from astropy.cosmology import Planck18
from astropy.coordinates import SkyCoord
import astropy.units as u

from astropy.table import Table, Column
import pandas as pd
import os
import glob
import random

In [3]:
# original files are found here https://data.desi.lbl.gov/public/edr/vac/edr/lss/v2.0/LSScats/clustering/
lss_data_N = Table.read("data/QSO_N_clustering.dat.fits")
lss_data_S = Table.read("data/QSO_S_clustering.dat.fits")

In [4]:
# Filter one-dimensional columns
names_N = [name for name in lss_data_N.colnames if len(lss_data_N[name].shape) <= 1]
names_S = [name for name in lss_data_S.colnames if len(lss_data_S[name].shape) <= 1]

# Convert to pandas DataFrame with only valid columns
df_N = lss_data_N[names_N].to_pandas()
df_S = lss_data_S[names_S].to_pandas()

# Concatenate both DataFrames
df_total = pd.concat([df_N, df_S], ignore_index=True)
df_total.rename(columns={'Z': 'REDSHIFT'}, inplace=True)

In [5]:
# Group by ROSETTE_NUMBER
df = {}
for rosette in sorted(df_total['ROSETTE_NUMBER'].unique()):
    df[rosette] = df_total[df_total['ROSETTE_NUMBER'] == rosette].copy()

# For random file:

In [8]:
cosmo = Planck18 # define cosmology

In [14]:
ran_files_N = sorted(glob.glob("data/QSO_N_*_clustering.ran.fits"))
ran_files_S = sorted(glob.glob("data/QSO_S_*_clustering.ran.fits"))

In [11]:
# ROSETTE_NUMBERs per hemisphere
rosettas_N = set(lss_data_N['ROSETTE_NUMBER'])
rosettas_S = set(lss_data_S['ROSETTE_NUMBER'])

In [None]:
# Number of random files per rosette
n_randoms = 100

for i in range(len(df)):
    rosetta = df[i]['ROSETTE_NUMBER'].iloc[0]
    n_rows = len(df[i])

    # File selection by hemisphere
    if rosetta in rosettas_N:
        ran_files = ran_files_N
        used_ran = set()
        hemisphere = "N"
    elif rosetta in rosettas_S:
        ran_files = ran_files_S
        used_ran = set()
        hemisphere = "S"

    for j in range(n_randoms):
        # If we have already used all the files, we allow them again
        if len(used_ran) == len(ran_files):
            used_ran = set()

        # Choose available .ran file
        candidates = [f for f in ran_files if f not in used_ran]
        ran_file = random.choice(candidates)
        used_ran.add(ran_file)

        print(f"→ df[{i}] random #{j+1} uses: {ran_file}")

        # Read and process random file
        ran_table = Table.read(ran_file)
        ran_full_df = ran_table.to_pandas()

        ran_filtered_df = ran_full_df[ran_full_df['ROSETTE_NUMBER'] == rosetta]

        ran_df = ran_filtered_df.sample(n=n_rows, random_state=j).reset_index(drop=True)

        # Comoving distance and cartesian coordinates
        z_ran = ran_df['Z'].values
        d_comov = cosmo.comoving_distance(z_ran).to(u.Mpc)
        ran_df['COMOVING_DISTANCE'] = d_comov.value

        coords = SkyCoord(ra=ran_df['RA'].values * u.deg,
                          dec=ran_df['DEC'].values * u.deg,
                          distance=d_comov)

        x = Column(coords.cartesian.x.to(u.Mpc), name='X')
        y = Column(coords.cartesian.y.to(u.Mpc), name='Y')
        z = Column(coords.cartesian.z.to(u.Mpc), name='Z')
        targetid = Column(ran_df['TARGETID'], name='TARGETID')

        output_table = Table([targetid, x, y, z])
        filename = f"data_100_random/QSO_{i}_clustering_random_{j}.ecsv"
        output_table.write(filename, format='ascii.ecsv', overwrite=True)
        print(f"✔️ Saved file: {filename}")

→ df[0] random #1 uses: data\QSO_S_8_clustering.ran.fits
✔️ Saved file: data_100_random/QSO_0_clustering_random_0.ecsv
→ df[0] random #2 uses: data\QSO_S_4_clustering.ran.fits
✔️ Saved file: data_100_random/QSO_0_clustering_random_1.ecsv
→ df[0] random #3 uses: data\QSO_S_11_clustering.ran.fits
✔️ Saved file: data_100_random/QSO_0_clustering_random_2.ecsv
→ df[0] random #4 uses: data\QSO_S_12_clustering.ran.fits
✔️ Saved file: data_100_random/QSO_0_clustering_random_3.ecsv
→ df[0] random #5 uses: data\QSO_S_6_clustering.ran.fits
✔️ Saved file: data_100_random/QSO_0_clustering_random_4.ecsv
→ df[0] random #6 uses: data\QSO_S_14_clustering.ran.fits
✔️ Saved file: data_100_random/QSO_0_clustering_random_5.ecsv
→ df[0] random #7 uses: data\QSO_S_9_clustering.ran.fits
✔️ Saved file: data_100_random/QSO_0_clustering_random_6.ecsv
→ df[0] random #8 uses: data\QSO_S_10_clustering.ran.fits
✔️ Saved file: data_100_random/QSO_0_clustering_random_7.ecsv
→ df[0] random #9 uses: data\QSO_S_1_cluster