In [2]:
from astropy.cosmology import Planck18
from astropy.coordinates import SkyCoord
import astropy.units as u

from astropy.table import Table, Column
import pandas as pd
import os
import glob
import random

In [3]:
# original files are found here https://data.desi.lbl.gov/public/dr1/survey/catalogs/dr1/LSS/iron/LSScats/v1.5/
lss_data_N = Table.read("data/QSO_NGC_clustering.dat.fits")

In [4]:
# Filter one-dimensional columns
names_N = [name for name in lss_data_N.colnames if len(lss_data_N[name].shape) <= 1]

# Convert to pandas DataFrame with only valid columns
df_N = lss_data_N[names_N].to_pandas()

df_N["ORIGIN_FILE"] = "NGC"

In [5]:
df_NGC_1 = df_N[
    (df_N['RA'] > 110) & (df_N['RA'] < 260) &
    (df_N['DEC'] > -10) & (df_N['DEC'] < 8) 
]

df_NGC_2 = df_N[
    (df_N['RA'] > 180) & (df_N['RA'] < 260) &
    (df_N['DEC'] > 30) & (df_N['DEC'] < 40) 
]

In [19]:
df_NGC_1.rename(columns={'Z': 'REDSHIFT'}, inplace=True)
df_NGC_2.rename(columns={'Z': 'REDSHIFT'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_NGC_1.rename(columns={'Z': 'REDSHIFT'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_NGC_2.rename(columns={'Z': 'REDSHIFT'}, inplace=True)


In [25]:
cosmo = Planck18 # define cosmology

def parameters_for_files(df):
    redshift = df['REDSHIFT'].values
    ra = df['RA'].values
    dec = df['DEC'].values

    # comoving distance
    d_comov = cosmo.comoving_distance(redshift).to(u.Mpc).value
    df['COMOVING_DISTANCE'] = d_comov

    coords = SkyCoord(ra=ra*u.deg, dec=dec*u.deg, distance=d_comov*u.Mpc, frame='icrs')

    # cartesian coordinates
    x = coords.cartesian.x.value
    y = coords.cartesian.y.value
    z = coords.cartesian.z.value 

    df['X'] = coords.cartesian.x.to(u.Mpc).value
    df['Y'] = coords.cartesian.y.to(u.Mpc).value
    df['Z'] = coords.cartesian.z.to(u.Mpc).value 
    return df

In [26]:
def save_ecsv(df, name):
    targetid = Column(df["TARGETID"].values)
    x = Column(df["X"].values, unit=u.Mpc)
    y = Column(df["Y"].values, unit=u.Mpc)
    z = Column(df["Z"].values, unit=u.Mpc)
    ra = Column(df["RA"].values, unit=u.deg)
    dec = Column(df["DEC"].values, unit=u.deg)
    redshift = Column(df["REDSHIFT"].values)

    table = Table([targetid, x, y, z, ra, dec, redshift],
                  names=("TARGETID", "X", "Y", "Z", "RA", "DEC", "REDSHIFT"))
    filename = f"create_files/QSO_{name}_clustering_data.ecsv"
    table.write(filename, format="ascii.ecsv", overwrite=True)
    print(f"Saved file: {filename}")

save_ecsv(parameters_for_files(df_NGC_1), "NGC1")
save_ecsv(parameters_for_files(df_NGC_2), "NGC2")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['COMOVING_DISTANCE'] = d_comov
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['X'] = coords.cartesian.x.to(u.Mpc).value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Y'] = coords.cartesian.y.to(u.Mpc).value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc

Saved file: create_files/QSO_NGC1_clustering_data.ecsv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['COMOVING_DISTANCE'] = d_comov
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['X'] = coords.cartesian.x.to(u.Mpc).value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Y'] = coords.cartesian.y.to(u.Mpc).value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc

Saved file: create_files/QSO_NGC2_clustering_data.ecsv


In [5]:
regions = {'NGC_1': df_NGC_1, 'NGC_2': df_NGC_2}

# For random file:

In [6]:
cosmo = Planck18 # define cosmology

In [11]:
ran_files_NGC = sorted(glob.glob("data/QSO_NGC_*_clustering.ran.fits"))

In [12]:
output_dir = "data_100_random"

In [14]:
file_use_count = {i: 0 for i in range(len(ran_files_NGC))}

for region_name, region_df in regions.items():
    used_indices = set()
    n_files = len(ran_files_NGC)
    n_rows = len(region_df)

    for j in range(100):
        if len(used_indices) == n_files:
            used_indices = set()

        available_indices = list(set(range(n_files)) - used_indices)
        file_index = random.choice(available_indices)
        used_indices.add(file_index)

        ran_table = Table.read(ran_files_NGC[file_index])
        ran_df = ran_table.to_pandas()

        ran_df_full = ran_table.to_pandas()

        if region_name == 'NGC_1':
            filtered_ran = ran_df_full[(ran_df_full['RA'] > 110) & (ran_df_full['RA'] < 260) &
                                    (ran_df_full['DEC'] > -10) & (ran_df_full['DEC'] < 8)]
        else:
            filtered_ran = ran_df_full[(ran_df_full['RA'] > 180) & (ran_df_full['RA'] < 260) &
                                    (ran_df_full['DEC'] > 30) & (ran_df_full['DEC'] < 40)]

        ran_df = filtered_ran.sample(n=n_rows, random_state=j).reset_index(drop=True)

        z_ran = ran_df['Z'].values
        d_comov = cosmo.comoving_distance(z_ran).to(u.Mpc)
        coords = SkyCoord(ra=ran_df['RA'].values * u.deg,
                          dec=ran_df['DEC'].values * u.deg,
                          distance=d_comov)


        output_table = Table({
            'TARGETID': ran_df['TARGETID'],
            'X': coords.cartesian.x.to(u.Mpc),
            'Y': coords.cartesian.y.to(u.Mpc),
            'Z': coords.cartesian.z.to(u.Mpc),
            'RA': ran_df['RA'],
            'DEC': ran_df['DEC'],
            'REDSHIFT': ran_df['Z'],
        })

        filename = f"{output_dir}/{region_name}_random_{j}.ecsv"
        output_table.write(filename, format='ascii.ecsv', overwrite=True)
        print(f"Saved file: {filename}")

Saved file: data_100_random/NGC_1_random_0.ecsv
Saved file: data_100_random/NGC_1_random_1.ecsv
Saved file: data_100_random/NGC_1_random_2.ecsv
Saved file: data_100_random/NGC_1_random_3.ecsv
Saved file: data_100_random/NGC_1_random_4.ecsv
Saved file: data_100_random/NGC_1_random_5.ecsv
Saved file: data_100_random/NGC_1_random_6.ecsv
Saved file: data_100_random/NGC_1_random_7.ecsv
Saved file: data_100_random/NGC_1_random_8.ecsv
Saved file: data_100_random/NGC_1_random_9.ecsv
Saved file: data_100_random/NGC_1_random_10.ecsv
Saved file: data_100_random/NGC_1_random_11.ecsv
Saved file: data_100_random/NGC_1_random_12.ecsv
Saved file: data_100_random/NGC_1_random_13.ecsv
Saved file: data_100_random/NGC_1_random_14.ecsv
Saved file: data_100_random/NGC_1_random_15.ecsv
Saved file: data_100_random/NGC_1_random_16.ecsv
Saved file: data_100_random/NGC_1_random_17.ecsv
Saved file: data_100_random/NGC_1_random_18.ecsv
Saved file: data_100_random/NGC_1_random_19.ecsv
Saved file: data_100_random/NG