In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import healpy as hp
from statistics import mean, stdev
import os
from joblib import Parallel, delayed
from tqdm import tqdm

output_dir = "Randomized_data"
os.makedirs(output_dir, exist_ok=True)

file_inter = "inter_data2"
os.makedirs(file_inter, exist_ok=True)

nside = 8
nf = 100  # number of bootstrap samples
no_samp = 3  # number of quasar samples
n_jobs = 15   # Number of parallel processes to use

# -----------------------------
# Helper function for each sample randomization
# -----------------------------
def randomize_sample(k, f, r, id_pix):
    random.seed((f+1))
    num_points = len(r)

    valid_theta = []
    valid_phi = []

    while(len(valid_theta) < num_points):
        cos_theta = random.uniform(-1.0, 1.0)
        phi = random.uniform(0, 2*np.pi)
        theta = np.arccos(cos_theta)
        pix = hp.ang2pix(nside, theta, phi)

        if pix in id_pix:
            valid_theta.append(theta)
            valid_phi.append(phi)

    v_theta = np.array(valid_theta)
    v_phi = np.array(valid_phi)

    dict1 = {'r': r, 'th': v_theta, 'ph': v_phi}
    fd = pd.DataFrame(dict1)

    out = 'Randomized_data/s' + str(k+1) + '_rand' + str(f+1) + '.dat'
    fd.to_csv(out, sep="\t", header=None, index=False)

# -----------------------------
# Main loop over all samples
# -----------------------------
for k in range(no_samp):
    f_in = '../Data_prep_masking/mask1_err/masked_sample_' + str(k+1) + '.dat'
    df1 = pd.read_csv(f_in, sep="\t", header=None)
    df1.columns = ['r', 'th', 'ph']

    r = df1['r'].to_numpy()
    th = df1['th'].to_numpy()
    ph = df1['ph'].to_numpy()

    f1 = '../Data_prep_masking/mask1_err/non_zero_pix_id_' + str(k+1) + '.dat'
    df4 = pd.read_csv(f1, sep="\t", header=None)
    df4.columns = ['id_list']
    id_pix = df4['id_list'].to_numpy()

    # Parallel execution of randomization for each bootstrap sample
    Parallel(n_jobs=n_jobs, backend='loky')(
        delayed(randomize_sample)(k, f, r, id_pix)
        for f in range(nf))
###############################################################################################################################################

# Compute RÃ©nyi entropies for q = 1 to 5
def compute_renyi_entropy(input_file, output_file, rmin, rmax, Nside=nside, nbin=30):
    Npix = hp.nside2npix(Nside)

    df = pd.read_csv(input_file, sep="\t", header=None)
    df.columns = ['r', 'th', 'ph']
    r1 = df['r'].to_numpy()
    th1 = df['th'].to_numpy()
    ph1 = df['ph'].to_numpy()

    dr = (rmax - rmin) / nbin

    m = np.zeros((nbin, Npix), dtype=float)
    Neff = np.zeros(nbin, dtype=int)

    for rr, tt, pp in zip(r1, th1, ph1):
        for j in range(nbin):
            if rmin <= rr <= (rmin + (j+1) * dr):
                px = hp.ang2pix(Nside, tt, pp)
                m[j][px] += 1
                Neff[j] += 1

    p = np.zeros((nbin, Npix), dtype=float)
    for i in range(nbin):
        for px in range(Npix):
            if Neff[i] > 0:
                p[i][px] = m[i][px] / Neff[i]

    H = np.zeros((nbin, 5), dtype=float)
    a = np.zeros((nbin, 5), dtype=float)
    
    for k in range(5):
        h = np.zeros(nbin, dtype=float)
        q = k + 1
        if q == 1:
            for i in range(nbin):
                for px in range(Npix):
                    if p[i][px] > 0:
                        H[i][k] -= p[i][px] * np.log10(p[i][px])
        else:
            for i in range(nbin):
                for px in range(Npix):
                    h[i] += p[i][px] ** q
                if h[i] > 0:
                    H[i][k] = np.log10(h[i]) / (1 - q)

        for i in range(nbin):
            a[i][k] = H[i][k]

    R = np.zeros(nbin)
    for b in range(nbin):
        R[b] = rmin + (b+1)*dr
        
    df1 = pd.DataFrame(data=a)
    df2 = pd.DataFrame(data=R)
    df3 = pd.concat([df2, df1], axis=1, join='inner')
    df3.to_csv(output_file, sep='\t', header=False, index=False)

#########################################################################################

R_max = np.zeros(no_samp)
R_min = np.zeros(no_samp)

for n in range(no_samp):
    f_in = '../Data_prep_masking/mask1_err/masked_sample_' + str(n+1) + '.dat'
    df = pd.read_csv(f_in, sep="\t", header=None)
    df.columns = ['r', 'th', 'ph']
    R_max[n] = df['r'].max()
    R_min[n] = df['r'].min()

    # Prepare list of jobs for parallel processing
    tasks = []
    for l in range(nf):
        f_samp = 'Randomized_data/s' + str(n+1) + '_rand' + str(l+1) + '.dat'
        f_out = 'inter_data2/anis_s' + str(n+1) + '_rand' + str(l+1) + '.dat'
        tasks.append((f_samp, f_out, R_min[n], R_max[n]))

    # Execute in parallel
    Parallel(n_jobs=n_jobs, backend='loky')(
        delayed(compute_renyi_entropy)(f_samp, f_out, rmin, rmax)
        for f_samp, f_out, rmin, rmax in tasks)

##########################################################################################

# calculation of normalized entropy dispersion and error estimation (simple way, manually)
nbin = 30
for n in range(no_samp):    
    criteria = np.zeros((nf,nbin)) 
    mean_crit = np.zeros(nbin) 
    d_crit = np.zeros(nbin) 
    for f in range(nf):
        file = 'inter_data2/anis_s'+ str(n+1) + '_rand' + str (f+1) + '.dat'       
        RR = np.loadtxt(file)[:,0]
        a1 = np.loadtxt(file)[:,1]
        a2 = np.loadtxt(file)[:,2]
        a3 = np.loadtxt(file)[:,3]
        a4 = np.loadtxt(file)[:,4]
        a5 = np.loadtxt(file)[:,5]
        a_mean = np.zeros(len(RR))
        stab_cri = np.zeros(len(RR))
        for i in range(len(RR)):
            a_mean[i] = (a1[i]+a2[i]+a3[i]+a4[i]+a5[i])/5.0
            stab_cri[i] = np.sqrt((1/5)*((a1[i]-a_mean[i])**2+(a2[i]-a_mean[i])**2+(a3[i]-a_mean[i])**2
                                    +(a4[i]-a_mean[i])**2+(a5[i]-a_mean[i])**2))
    
        criteria[f] = stab_cri/a_mean
        
    for i in range(nbin):
        list_crit = []
        for j in range(nf):
            list_crit.append(criteria[j][i])
            
         
        mean_crit[i] = mean(list_crit)        
        d_crit[i] = stdev(list_crit)
        
    dict1 = {'r': RR, 'crit':mean_crit, 'sd': d_crit } 
    fd = pd.DataFrame(dict1)
    f_name1 = 'inter_data2/sample_rand_'+str(n+1)+'_criteria.csv' 
    fd.to_csv(f_name1,index=False)
    
############################################################################################## 

#remove unwanted files
for n in range(no_samp):
    for l in range(nf):
        file_name = 'inter_data2/anis_s' + str(n+1) + '_rand' + str (l+1) + '.dat'
        os.remove(file_name)
        
import shutil
shutil.rmtree('Randomized_data')
   






  

