In [1]:
import numpy as np
import pandas as pd
import healpy as hp
from statistics import stdev

nside = 8   # this is for masking 1 and masking 2

# Compute Rényi entropies for q = 1 to 5

def compute_renyi_entropy(input_file, output_file,rmin, rmax, Nside= nside, nbin=30):
    Npix = hp.nside2npix(Nside)

    # Load data
    df = pd.read_csv(input_file, sep="\t", header=None)
    df.columns = ['r', 'th', 'ph']
    r1 = df['r'].to_numpy()
    th1 = df['th'].to_numpy()
    ph1 = df['ph'].to_numpy()


    #rmin, rmax = np.min(r1), np.max(r1)
    dr = (rmax - rmin) / nbin

    # Initialize counts and effective numbers
    m = np.zeros((nbin, Npix), dtype=float)
    Neff = np.zeros(nbin, dtype=int)

    # Fill pixel counts per radial bin
    for rr, tt, pp in zip(r1, th1, ph1):
        for j in range(nbin):
            if rmin <= rr <= (rmin + (j+1) * dr):
                px = hp.ang2pix(Nside, tt, pp)
                m[j][px] += 1
                Neff[j] += 1

    # Normalize to get probabilities
    p = np.zeros((nbin, Npix), dtype=float)
    for i in range(nbin):
        for px in range(Npix):
            if Neff[i] > 0:
                p[i][px] = m[i][px] / Neff[i]

    # Compute Rényi entropies for q = 1 to 5, exactly as provided
    H = np.zeros((nbin, 5), dtype=float)
    a = np.zeros((nbin, 5), dtype=float)
    
    for k in range(5):
        h = np.zeros(nbin, dtype=float)
        q = k + 1
        if q == 1:
            for i in range(nbin):
                for px in range(Npix):
                    if p[i][px] > 0:
                        H[i][k] -= p[i][px] * np.log10(p[i][px])
        else:
            for i in range(nbin):
                for px in range(Npix):
                    h[i] += p[i][px] ** q
                if h[i] > 0:
                    H[i][k] = np.log10(h[i]) / (1 - q)

        for i in range(nbin):
            a[i][k] = H[i][k]
            
    R = np.zeros(nbin)
    for b in range(nbin):
        R[b] = rmin + (b+1)*dr
        
    df1=pd.DataFrame(data=a)
    df2=pd.DataFrame(data=R)
    df3= pd.concat([df2,df1], axis=1, join='inner')
    df3.to_csv(output_file, sep='\t', header=False, index=False)
    
##########################################################################################

# Run for the three samples

R_max = np.zeros(3)
R_min = np.zeros(3)

for n in range(3):
    f_in = '../data_prep/mask1/masked_sample_' + str(n+1) + '.dat'
    # f_in = '../data_prep/mask2/masked_sample_' + str(n+1) + '.dat'
   

    df = pd.read_csv(f_in,sep="\t",header = None)
    df.columns = ['r','th','ph']
    R_max[n] = df['r'].max()
    R_min[n] = df['r'].min()

    f_out1 = 'renyi_anis_s'+ str(n+1)+'.dat'
    compute_renyi_entropy(f_in, f_out1,R_min[n],R_max[n])
    
    
    for l in range(10):
        fd=df.sample(frac=0.8,replace=True,random_state = l)
        f_samp = 'sample'+ str(n+1) + '_' + str(l+1) + '.dat' 
        fd.to_csv(f_samp,sep="\t",header = None,index = False)
        f_out2 = 'anis_s'+ str(n+1) + '_' + str (l+1) + '.dat'
        compute_renyi_entropy(f_samp,f_out2,R_min[n],R_max[n])

In [2]:
# error estimation
nf = 100
nbin= 30
for n in range(3):
    file0 = 'renyi_anis_s'+ str(n+1)+'.dat'
    raw_data = np.loadtxt(file0)

    s1, s2, s3, s4, s5 = raw_data[:, 1], raw_data[:, 2], raw_data[:, 3], raw_data[:, 4], raw_data[:, 5]

    a1 = np.zeros((nf, nbin))
    a2 = np.zeros((nf, nbin))
    a3 = np.zeros((nf, nbin))
    a4 = np.zeros((nf, nbin))
    a5 = np.zeros((nf, nbin))
    RR = np.zeros((nf, nbin))

    for f in range(nf):
        fname = 'anis_s'+ str(n+1) + '_' + str (f+1) + '.dat'
        data = np.loadtxt(fname)

        RR[f] = data[:, 0]
        a1[f] = data[:, 1]
        a2[f] = data[:, 2]
        a3[f] = data[:, 3]
        a4[f] = data[:, 4]
        a5[f] = data[:, 5]

    r_ = RR[1]  # common radius

    # standard deviations at each bin
    std_a1 = np.array([stdev(a1[:, i]) for i in range(nbin)])
    std_a2 = np.array([stdev(a2[:, i]) for i in range(nbin)])
    std_a3 = np.array([stdev(a3[:, i]) for i in range(nbin)])
    std_a4 = np.array([stdev(a4[:, i]) for i in range(nbin)])
    std_a5 = np.array([stdev(a5[:, i]) for i in range(nbin)])

    df_out = pd.DataFrame({
        'r': r_,
        'a1': s1,
        'a2': s2,
        'a3': s3,
        'a4': s4,
        'a5': s5,
        'std_a1': std_a1,
        'std_a2': std_a2,
        'std_a3': std_a3,
        'std_a4': std_a4,
        'std_a5': std_a5
    })

    f_name1 = 'sample'+str(n+1)+'_entr_err.csv' 
    df_out.to_csv(f_name1,index=False)

In [3]:
import os
for n in range(3):
    f_out1 = 'renyi_anis_s'+ str(n+1)+'.dat'
    os.remove(f_out1)
    for l in range(nf):
        file_name = 'anis_s'+ str(n+1) + '_' + str (l+1) + '.dat'
        os.remove(file_name)
        f_samp = 'sample'+ str(n+1) + '_' + str(l+1) + '.dat' 
        os.remove(f_samp)

