In [1]:
import numpy as np
from astropy.table import Table
from scipy.spatial import Delaunay
from itertools import combinations
import pandas as pd
import os

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.tri as mtri
import matplotlib.cm as cm
matplotlib.rcParams['figure.dpi'] = 360
matplotlib.rcParams['text.usetex'] = True
os.environ['PATH'] = '/Library/TeX/texbin:' + os.environ['PATH']

### Data

In [2]:
data_ngc = Table.read("create_files/QSO_NGC_clustering_data.ecsv", format="ascii.ecsv").to_pandas()
rand_ngc = Table.read("create_files/QSO_NGC_clustering_random.ecsv", format="ascii.ecsv").to_pandas()

data_sgc = Table.read("create_files/QSO_SGC_clustering_data.ecsv", format="ascii.ecsv").to_pandas()
rand_sgc = Table.read("create_files/QSO_SGC_clustering_random.ecsv", format="ascii.ecsv").to_pandas()

In [3]:
data_ngc['RAN'] = False
rand_ngc['RAN'] = True

data_sgc['RAN'] = False
rand_sgc['RAN'] = True

In [4]:
# Concatenate the data and random catalogs
df_north = pd.concat([data_ngc, rand_ngc], ignore_index=True)
df_south = pd.concat([data_sgc, rand_sgc], ignore_index=True)

In [7]:
from astropy.coordinates import CartesianRepresentation, ICRS
import astropy.units as u

In [None]:
def ra_dec(df):
    df_ra_dec = df.copy() 

    cartesian = CartesianRepresentation(
        df_ra_dec['X'].values * u.Mpc,
        df_ra_dec['Y'].values * u.Mpc,
        df_ra_dec['Z'].values * u.Mpc
    )

    coords = ICRS(cartesian)

    df_ra_dec['RA'] = coords.ra.deg
    df_ra_dec['DEC'] = coords.dec.deg

    return df_ra_dec


In [9]:
df_hemispheres = [ra_dec(df_north), ra_dec(df_south)]

### Delaunay triangulation

In [None]:
tri_3d_list = []
tri_2d_list = []

for df in df_hemispheres:
    coords_3d = df[['X', 'Y', 'Z']].values
    coords_2d = df[['X', 'Y']].values

    tri_3d = Delaunay(coords_3d)
    tri_2d = mtri.Triangulation(df['X'], df['Y'])

    tri_3d_list.append(tri_3d)
    tri_2d_list.append(tri_2d)


In [None]:
x_min, x_max = -5800, -5400
y_min, y_max =  -80,    0

fig, axes = plt.subplots(1, 2, figsize=(18, 9))
axes = axes.flatten()

for i, (name, df, triang, ax) in enumerate(zip(['North', 'South'], df_hemispheres, tri_2d_list, axes)):

    #df_cut = df[(df['X'] > x_min) & (df['X'] < x_max) & 
     #           (df['Y'] > y_min) & (df['Y'] < y_max)]
    
    df_cut = df

    data_real = df_cut[~df_cut['RAN']]
    data_rand = df_cut[df_cut['RAN']]

    ax.triplot(triang, linewidth=0.5, color='black', alpha=0.3)

    ax.scatter(data_rand['X'], data_rand['Y'], s=0.1, c='blue', label='Random')
    ax.scatter(data_real['X'], data_real['Y'], s=0.3, c='red', label='Data')

    ax.set_title(f'{name} Region', fontsize=12)
    ax.set_xlabel('X [Mpc]')
    ax.set_ylabel('Y [Mpc]')
    #ax.set_xlim(x_min, x_max)
    #ax.set_ylim(y_min, y_max)
    ax.set_box_aspect(1)
    ax.legend(fontsize=8, loc='upper right')

plt.tight_layout()
plt.show()


### Get $\space r$

In [None]:
def compute_r(df):
    coords = df[['X', 'Y', 'Z']].values
    is_data = ~df['RAN'].values

    tri = Delaunay(coords)

    #! adjacency list for neighbors
    neighbors = {i: set() for i in range(len(coords))}
    for simplex in tri.simplices:
        for i, j in combinations(simplex, 2):
            neighbors[i].add(j)
            neighbors[j].add(i)

    r = np.zeros(len(coords), dtype=float)
    for i, nbrs in neighbors.items():
        n_data = int(np.sum(is_data[list(nbrs)]))
        n_rand = len(nbrs) - n_data
        if (n_data + n_rand) > 0:
            r[i] = (n_data - n_rand) / (n_data + n_rand)
        else:
            raise ValueError(f'No neighbors for point {i} in the triangulation.')

    out = df.copy()
    out['r'] = r
    return out

In [None]:
df_r = [compute_r(df_hemispheres[0]), compute_r(df_hemispheres[1])] # [NORTH, SOUTH]

### CDF of $r$

In [None]:
from statsmodels.distributions.empirical_distribution import ECDF

In [None]:
colors = ['red', 'blue']
labels = ['North', 'South'] 

plt.figure(figsize=(10, 7))

for i in range(len(df_r)):
    r_data = df_r[i][~df_r[i]['RAN']]['r'].values
    r_rand = df_r[i][df_r[i]['RAN']]['r'].values

    cdf_data = ECDF(r_data)
    cdf_rand = ECDF(r_rand)

    r_data_sorted = cdf_data.x
    cdf_data_vals = cdf_data.y

    r_rand_sorted = cdf_rand.x
    cdf_rand_vals = cdf_rand.y

    color = colors[i]

    plt.plot(r_data_sorted, cdf_data_vals, color=color, linestyle='-', label=f'{labels[i]} Data')
    plt.plot(r_rand_sorted, cdf_rand_vals, color=color, linestyle='dotted', label=f'{labels[i]} Random')

plt.xlabel(r'$r\,[\mathrm{Mpc}]$')
plt.ylabel('CDF')
plt.title('QSO')
plt.grid(True)
plt.legend(ncol=2, fontsize='small', loc='upper left')
plt.tight_layout()
plt.show()


### Classify

In [None]:
def classify_r(df):
    r = df['r'].values
    conds = [(r >= -1.0) & (r <= -0.9),
             (r > -0.9) & (r <= 0.0),
             (r > 0.0) & (r <= 0.9),
             (r > 0.9) & (r <= 1.0),]
    choices = ['void', 'sheet', 'filament', 'knot']
    df = df.copy()
    df['TYPE'] = np.select(conds, choices, default='error')
    return df

In [None]:
df_typed = [classify_r(df_r[0]), classify_r(df_r[1])] # [NORTH, SOUTH]

In [None]:
structure_types = ['void', 'sheet', 'filament', 'knot']
hemisphere_labels = ['North', 'South'] 

dfs_by_hemisphere = []

for i, df in enumerate(df_typed):
    hemi_rows = []
    hemi_labels = []

    for source, label in [(False, f"{hemisphere_labels[i]} Data"), (True, f"{hemisphere_labels[i]} Random")]:
        df_sub = df[df['RAN'] == source]
        total = len(df_sub)
        values = []

        for t in structure_types:
            count = np.sum(df_sub['TYPE'] == t)
            if count == 0:
                frac = 0.0
                std = 0.0
            else:
                frac = count / total
                std = ((frac * (1 - frac)) / total) ** 0.5

            frac_percent = frac * 100
            std_percent = std * 100

            values.append(f"{frac_percent:.2f}% ± {std_percent:.2f}%")

        hemi_rows.append(values)
        hemi_labels.append(label)

    hemi_df = pd.DataFrame(hemi_rows, columns=['Voids', 'Sheets', 'Filaments', 'Knots'], index=hemi_labels)
    dfs_by_hemisphere.append(hemi_df)

for i, hemi_df in enumerate(dfs_by_hemisphere):
    print(f"\n{hemisphere_labels[i]}")
    display(hemi_df)


### Plot

In [None]:
type_colors = {
    'void': 'black',
    'sheet': 'blue',
    'filament': 'red',
    'knot': 'green'
}

# For data

In [None]:
xlims = [(-4500, -2500), (3500, 5500)]
ylims = [(-200,200), (-200,200)]

for h_idx, df in enumerate(df_typed):
    label = hemisphere_labels[h_idx]
    coords2d = df[['X', 'Y']].values
    is_real = ~df['RAN'].values

    fig, axes = plt.subplots(1, 4, figsize=(20, 5))  
    axes = axes.flatten()

    for t_idx, tp in enumerate(structure_types):
        color = type_colors[tp]
        mask_t = (df['TYPE'] == tp).values
        mask_real = mask_t & is_real

        ax = axes[t_idx]

        ax.scatter(
            coords2d[mask_real, 0], coords2d[mask_real, 1],
            s=1, facecolors=color, edgecolors='black', linewidths=0.3, alpha=0.4
        )
        ax.set_title(tp.capitalize(), fontsize=12)
        ax.set_xlabel('X [Mpc]')
        ax.set_ylabel('Y [Mpc]')
        ax.set_xlim(xlims[h_idx])
        ax.set_ylim(ylims[h_idx])
        ax.set_box_aspect(1)

    plt.suptitle(f'QSO - {label} Hemisphere (Data)', fontsize=16)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()


# For random

In [None]:
xlims = [(-4500, -2500), (3500, 5500)]
ylims = [(-200,200), (-200,200)]

for h_idx, df in enumerate(df_typed):
    label = hemisphere_labels[h_idx]
    coords2d = df[['X', 'Y']].values
    is_real = ~df['RAN'].values

    fig, axes = plt.subplots(1, 4, figsize=(20, 5))  
    axes = axes.flatten()

    for t_idx, tp in enumerate(structure_types):
        color = type_colors[tp]
        mask_t = (df['TYPE'] == tp).values
        mask_rand = mask_t & ~is_real

        ax = axes[t_idx]

        ax.scatter(
            coords2d[mask_rand, 0], coords2d[mask_rand, 1],
            s=1, facecolors=color, edgecolors='black', linewidths=0.3, alpha=0.4
        )
        ax.set_title(tp.capitalize(), fontsize=12)
        ax.set_xlabel('X [Mpc]')
        ax.set_ylabel('Y [Mpc]')
        ax.set_xlim(xlims[h_idx])
        ax.set_ylim(ylims[h_idx])
        ax.set_box_aspect(1)

    plt.suptitle(f'QSO - {label} Hemisphere (Random)', fontsize=16)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()