In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import healpy as hp
import os
from joblib import Parallel, delayed, parallel_backend
from tqdm import tqdm
from statistics import mean, stdev

# Output directories
output_dir = "Randomized_data"
os.makedirs(output_dir, exist_ok=True)

file_inter = "inter_data2"
os.makedirs(file_inter, exist_ok=True)

# Parameters
nside = 64
nf = 100         # Number of bootstrap random realizations
n_jobs = 15     # Number of parallel jobs

# -----------------------------
# Function to generate one randomized file
# -----------------------------
def randomize_sample(f, r, id_pix):
    random.seed((f + 1))  # Ensure reproducibility per bootstrap index

    num_points = len(r)
    valid_theta = []
    valid_phi = []

    while len(valid_theta) < num_points:
        cos_theta = random.uniform(-1.0, 1.0)
        phi = random.uniform(0, 2 * np.pi)
        theta = np.arccos(cos_theta)
        pix = hp.ang2pix(nside, theta, phi)

        if pix in id_pix:
            valid_theta.append(theta)
            valid_phi.append(phi)

    # Save randomized data
    v_theta = np.array(valid_theta)
    v_phi = np.array(valid_phi)
    df_out = pd.DataFrame({'r': r, 'th': v_theta, 'ph': v_phi})
    out_file = os.path.join(output_dir, f's2_rand{f+1}.dat')
    df_out.to_csv(out_file, sep="\t", header=None, index=False)

    return f"Saved: {out_file}"

# -----------------------------
# Load data for sample 2 
# Load original sample data
sample_file = f'../Data_exc_lqg_s2/m3_sample2_ex_LQG.dat'
df_sample = pd.read_csv(sample_file, sep="\t", header=None, names=['r', 'th', 'ph'])
r = df_sample['r'].to_numpy()

# Load valid pixel IDs
pix_file = f'../Data_exc_lqg_s2/m3_non_zero_pix_id_s2_ex_LQG.dat'
df_pix = pd.read_csv(pix_file, sep="\t", header=None, names=['id_list'])
id_pix = df_pix['id_list'].to_numpy()

# -----------------------------
# Run randomizations in parallel with tqdm
# -----------------------------

with parallel_backend("loky", n_jobs=n_jobs):
    results = list(
        Parallel()(delayed(randomize_sample)(f, r, id_pix) for f in range(nf)))
############################################################################################################


# -----------------------------------------------------------
# Compute RÃ©nyi entropies for q = 1 to 5
# -----------------------------------------------------------
def compute_renyi_entropy(input_file, output_file, rmin, rmax, Nside=nside, nbin=30):
    Npix = hp.nside2npix(Nside)

    df = pd.read_csv(input_file, sep="\t", header=None)
    df.columns = ['r', 'th', 'ph']
    r1 = df['r'].to_numpy()
    th1 = df['th'].to_numpy()
    ph1 = df['ph'].to_numpy()

    dr = (rmax - rmin) / nbin

    m = np.zeros((nbin, Npix), dtype=float)
    Neff = np.zeros(nbin, dtype=int)

    for rr, tt, pp in zip(r1, th1, ph1):
        for j in range(nbin):
            if rmin <= rr <= (rmin + (j + 1) * dr):
                px = hp.ang2pix(Nside, tt, pp)
                m[j][px] += 1
                Neff[j] += 1

    p = np.zeros((nbin, Npix), dtype=float)
    for i in range(nbin):
        for px in range(Npix):
            if Neff[i] > 0:
                p[i][px] = m[i][px] / Neff[i]

    H = np.zeros((nbin, 5), dtype=float)
    a = np.zeros((nbin, 5), dtype=float)

    for k in range(5):
        h = np.zeros(nbin, dtype=float)
        q = k + 1
        if q == 1:
            for i in range(nbin):
                for px in range(Npix):
                    if p[i][px] > 0:
                        H[i][k] -= p[i][px] * np.log10(p[i][px])
        else:
            for i in range(nbin):
                for px in range(Npix):
                    h[i] += p[i][px] ** q
                if h[i] > 0:
                    H[i][k] = np.log10(h[i]) / (1 - q)

        for i in range(nbin):
            a[i][k] = H[i][k]

    R = np.zeros(nbin)
    for b in range(nbin):
        R[b] = rmin + (b + 1) * dr

    df1 = pd.DataFrame(data=a)
    df2 = pd.DataFrame(data=R)
    df3 = pd.concat([df2, df1], axis=1, join='inner')
    df3.to_csv(output_file, sep='\t', header=False, index=False)


# -----------------------------------------------------------
# Apply entropy computation for sample 2
# -----------------------------------------------------------

f_in = '../Data_exc_lqg_s2/m3_sample2_ex_LQG.dat'
df = pd.read_csv(f_in, sep="\t", header=None)
df.columns = ['r', 'th', 'ph']
R_max = df['r'].max()
R_min = df['r'].min()

# Prepare list of jobs for parallel processing
tasks = []
for l in range(nf):
    f_samp = 'Randomized_data/s2_rand' + str(l + 1) + '.dat'
    f_out = 'inter_data2/anis_s2_rand' + str(l + 1) + '.dat'
    tasks.append((f_samp, f_out, R_min, R_max))

# Execute in parallel with proper tqdm integration

with parallel_backend("loky", n_jobs=n_jobs):
    results = list(
        Parallel()(
            delayed(compute_renyi_entropy)(f_samp, f_out, rmin, rmax)
            for f_samp, f_out, rmin, rmax in tasks))

################################################################################################################


from statistics import mean, stdev

nbin = 30
criteria = np.zeros((nf, nbin)) 
mean_crit = np.zeros(nbin) 
d_crit = np.zeros(nbin) 

for f in range(nf):
    file = 'inter_data2/anis_s2' + '_rand' + str(f+1) + '.dat'       
    RR = np.loadtxt(file)[:, 0]
    a1 = np.loadtxt(file)[:, 1]
    a2 = np.loadtxt(file)[:, 2]
    a3 = np.loadtxt(file)[:, 3]
    a4 = np.loadtxt(file)[:, 4]
    a5 = np.loadtxt(file)[:, 5]
    
    a_mean = np.zeros(len(RR))
    stab_cri = np.zeros(len(RR))
    
    for i in range(len(RR)):
        a_mean[i] = (a1[i] + a2[i] + a3[i] + a4[i] + a5[i]) / 5.0
        stab_cri[i] = np.sqrt((1 / 5) * ((a1[i] - a_mean[i]) ** 2 + (a2[i] - a_mean[i]) ** 2 +
                                        (a3[i] - a_mean[i]) ** 2 + (a4[i] - a_mean[i]) ** 2 +
                                        (a5[i] - a_mean[i]) ** 2))
    
    criteria[f] = stab_cri / a_mean

for i in range(nbin):
    list_crit = [criteria[j][i] for j in range(nf)]
    mean_crit[i] = mean(list_crit)        
    d_crit[i] = stdev(list_crit)

dict1 = {'r': RR, 'crit': mean_crit, 'sd': d_crit} 
fd = pd.DataFrame(dict1)
f_name1 = 'inter_data2/sample_rand_2' + '_criteria.csv' 
fd.to_csv(f_name1, index=False)

########################################################################################################################

#remove unwanted files
for l in range(nf):
    file_name = 'inter_data2/anis_s2' + '_rand' + str (l+1) + '.dat'
    os.remove(file_name)
        
import shutil
shutil.rmtree('Randomized_data')


