## 0. Packages

In [None]:
# Numerical and Scientific Computing
import numpy as np
import pandas as pd
import scipy as sp
import math
from numba import jit
from joblib import Parallel, delayed



from scipy import special
from scipy.integrate import quad
from scipy import integrate
from scipy import stats
from scipy.interpolate import interp1d

# Plotting and Data Visualization
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
from matplotlib import gridspec
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, AutoMinorLocator)
import matplotlib as mpl
from matplotlib import ticker, cm
from matplotlib.colors import LinearSegmentedColormap
from tabulate import tabulate

import pyregion

# File Handling
import os
import glob
import csv

# Astronomical

from astropy.cosmology import FlatLambdaCDM
from astropy.cosmology import Planck15
import astropy.units as u
from astropy.io import fits
from astropy.table import Table
from astroML.stats import binned_statistic_2d


# Other
from tqdm.notebook import tqdm


## 1. Functions

In [None]:
c0 = 3e5
H_0 = 70
Omega_l = 0.7
Omega_m = 0.3

lim_deltaz = 2

cosmo = FlatLambdaCDM(H0 = H_0, Om0 = Omega_m)

###########################################################################
############################ LSS functions ################################
###########################################################################

def M_lim(z):
    """Fitting function for the mass completeness limit Weaver et. al 2022"""
    return np.log10(-1.51e6 * (1+z) + 6.8e7 * (1+z)**2)

def M_lim_ks(z):
    """Fitting function for mass completeness limit (on K_s) Weaver et al 2022"""
    return np.log10(-3.55e8 * (1+z) + 2.7e8 * (1+z)**2)

def slice_width(z):
    """Calculate the width of each redshift slice of size _physical_width_ (Mpc h^-1)"""
    return physical_width * 100 / c0 * np.sqrt(Omega_m * (1+z)**3 + Omega_l)


def redshift_bins(zmin, zmax):
    """returns the slice centers and widths, given a physical length in (Mpc h^-1) """
    centers = []
    centers.append(zmin + 0.5 * slice_width(zmin))

    i = 0
    while (centers[i] + slice_width(centers[i]) < zmax ):
        centers.append(centers[i] + slice_width(centers[i]))
        i += 1

    centers = np.array(centers)

    "redshift edges"
    edges = np.zeros((len(centers), 2))

    for i in range(0, len(centers)):
        edges[i, 0] = centers[i] - slice_width(centers[i]) / 2
        edges[i, 1] = centers[i] + slice_width(centers[i]) / 2

    return (centers, edges)


def cartesian_from_polar(phi, theta):
    """ 
    phi, theta : float or numpy.array
        azimuthal and polar angle in radians.
    Returns
    -------
    nhat : numpy.array
        unit vector(s) in direction (phi, theta).
    """
    x = np.sin(theta) * np.cos(phi)
    y = np.sin(theta) * np.sin(phi)
    z = np.cos(theta)
    return np.array([x, y, z])

def cos_dist(alpha, delta, alpha0, delta0):
    """ gets all angles in [deg]"""
    phi = alpha * np.pi / 180
    theta = np.pi / 2 - delta * np.pi / 180
    phi0 = alpha0 * np.pi / 180
    theta0 = np.pi / 2 - delta0 * np.pi / 180
    
    x = cartesian_from_polar(phi, theta)
    x0 = cartesian_from_polar(phi0, theta0)
    cosdist = np.tensordot(x, x0, axes=[[0], [0]])
    return np.clip(cosdist, 0, 1)

def logsinh(x):
    if np.any(x < 0):
        raise ValueError("logsinh only valid for positive arguments")
    return x + np.log(1-np.exp(-2*x)) - np.log(2)

def Log_K(alpha, delta, alpha0, delta0, kappa):
    norm = -np.log(4 * np.pi / kappa) - logsinh(kappa)
    return norm + cos_dist(alpha, delta, alpha0, delta0) * kappa

def σ_k(X0, b, points):
    kappa = 1 / (b * np.pi / 180)**2
    X0_x = points[X0, 0]
    X0_y = points[X0, 1]

    # Use boolean mask instead of np.delete
    mask = np.ones(len(points), dtype=bool)
    mask[X0] = False
    rem = points[mask]

    arr = rem[:, 2] * np.exp(Log_K(rem[:, 0], rem[:, 1], X0_x, X0_y, kappa))
    return np.sum(arr)

def LCV(b, points):
    N = len(points)
    arr1 = [np.log(σ_k(i, b, points)) for i in range(N)]
    return np.mean(arr1)


def σ_k_gaussian(X0, b, points):
    X0_x = points[X0, 0]
    X0_y = points[X0, 1]
    rem = np.delete(points, X0, axis = 0)

    Cosdists = cos_dist(rem[:, 0], rem[:, 1], X0_x, X0_y)
    arr = rem[:, 2] * norm.pdf(np.arccos(Cosdists[:]), loc = 0, scale = b * np.pi / 180)
    return np.sum(arr)

def σ(alpha, delta, b_i, points):
    kappa = 1 / (b_i * np.pi / 180)**2
    arr2 = points[:, 2] * np.exp(Log_K(points[:, 0], points[:, 1], alpha, delta, kappa))
    return np.sum(arr2)

def Adaptive_b(b, points):
    g_i = np.array([np.log(points[i, 4] * σ(points[i, 0], points[i, 1], b, points)) for i in range(0, len(points))])
    log_g = 1 / len(points) * np.sum(g_i)
    b_i = np.array([(b * (points[i, 4] * σ(points[i, 0], points[i, 1], b, points) / np.exp(log_g))** -0.5) for i in tqdm(range(0, len(points)))])
    return b_i

def divider_NUV(rj):
    return (3*rj+1)

In [None]:
def setup(work_path='.'):
    '''
    Set up all of the necessary directories
    '''
    for subdir in ('inputs', 'outputs', 'bin', 
                   'outputs/plots', 'outputs/weights', 'outputs/density'):
        path = os.path.join(work_path, subdir)
        if not os.path.exists(path):
            os.makedirs(path)  # Create intermediate directories automatically
            print(f'Built directory: {os.path.abspath(path)}')  # Use absolute paths for clarity
    
    outputs_dir = os.path.join(work_path, 'outputs')
    plots_dir = os.path.join(work_path, 'outputs', 'plots')  # Ensure consistent separators
    inputs_dir = os.path.join(work_path, 'inputs')
    weight_dir = os.path.join(work_path, 'outputs', 'weights')  # Consistent separators
    density_dir = os.path.join(work_path, 'outputs', 'density')  # Consistent separators
    return outputs_dir, plots_dir, inputs_dir, weight_dir, density_dir

cat_dir = "where you want to set up the catalog directories"

outputs_dir, plots_dir, inputs_dir, weights_dir, density_dir = setup(work_path=cat_dir)

## 2 Preparing Data

In [None]:
z_min, z_max = 0.4, 9.5

physical_width = 35 # h^-1 Mpc

slice_centers, z_edges = redshift_bins(z_min, z_max)

z_width = z_edges[:, 1] - z_edges[:, 0]

In [None]:
Data = "path to your data file"

threshold = 0.05

# Load the .npy files
weights = np.load(os.path.join(weights_dir, f'weights_unthresholded_normalized_thresh{threshold}_lengh{physical_width}.npy'))
weights_block = np.load(os.path.join(weights_dir, f'weightsBlock_unthresholded_normalized_thresh{threshold}_lengh{physical_width}.npy'))
W = np.load(os.path.join(weights_dir, f'weightsBlock_thresh{threshold}_normalized_lengh{physical_width}.npy'))
normalized_delta_z_median = np.load(os.path.join(weights_dir, f'normalized_delta_z_median_thresh{threshold}_lengh{physical_width}.npy'))
delta_z_median = np.load(os.path.join(weights_dir, f'delta_z_median_thresh{threshold}_lengh{physical_width}.npy'))
count_in_zslice = np.load(os.path.join(weights_dir, f'count_in_zslice_thresh{threshold}_lengh{physical_width}.npy'))

## 3. b Calculation

In [None]:
best_bandwidths = np.zeros(len(slice_centers))
from scipy.optimize import minimize_scalar

for i in range(len(slice_centers)):
    print('Progress:', i / len(slice_centers) * 100, '%')

    zmin1 =  z_edges[i, 0]
    zmax1 = z_edges[i, 1]


    sel = (Data['zPDF'] > zmin1) & (Data['zPDF'] < zmax1)

    sub_Data = Data[sel]

    # calculating weights for this sample

    weights1 = np.zeros(len(sub_Data))
    delta_z_1 = sub_Data['zPDF_u68'] - sub_Data['zPDF_l68']
    sigma_1 = delta_z_1 / 2
    mu_1 = sub_Data['zPDF']
    Gauss_coeff = 1 / (np.sqrt(2) * sigma_1)

    weights1 = 0.5 * (special.erf(Gauss_coeff * (mu_1 - zmin1)) - special.erf(Gauss_coeff * (mu_1 - zmax1)) )

    # Rotation matrix for the field
    tan = (1.7259 - 1.9644) / (149.66 - 150.31) 
    theta = -np.arctan(tan)

    x = np.cos(theta) * sub_Data['ra_detec'] + np.sin(theta) * sub_Data['dec_detec']
    y = -np.sin(theta) * sub_Data['ra_detec'] + np.cos(theta) * sub_Data['dec_detec']

    # Convert x, y, weights1 to a structured NumPy array for efficient access
    pts = np.column_stack((x, y, weights1))

    # Define grid for bandwidth optimization
    b_grid = np.logspace(np.log10(0.005), np.log10(0.3), 100)
    if len(pts) == 0:
        print('No points in this slice')
        best_bandwidths[i] = np.nan
    else:
        print('Number of points in this slice:', len(pts))

        # Define function to maximize
        def neg_LCV(b):
            return -LCV(b, pts)

        # Continuous optimization
        res = minimize_scalar(
            neg_LCV,
            bounds=(0.001, 0.3),
            method='bounded'
        )

        b = res.x
        print('z_min:', zmin1, 'z_max:', zmax1)
        print('Best global bandwidth:', b)

        best_bandwidths[i] = b
        print('_______________________________________')

best_bandwidths = np.nan_to_num(best_bandwidths, nan=np.nanmean(best_bandwidths))
np.save(os.path.join(weights_dir, f'best_bandwidths_thresh{threshold}_lengh{physical_width}.npy'), best_bandwidths)