In [4]:
import numpy as np
import sklearn.neighbors as sk
import pickle
import time

In [5]:
#path="./../../../DLA Mock Catalogue/"
path="./../Data/"
names=["DLAhost_snap49_r1_b1 (copy).txt","DLAhost_snap49_r1_b1alpha (copy).txt","DLAhost_snap49_r1_b1T10 (copy).txt"]
boxsize=150.0
numPart=1024.0  # Number of particles will be the cube of this number
forceRes=(boxsize/numPart)/30  # Force resolution to avoid diverging values is usually 1/30th mean-interparticle spacing
maxLength=boxsize/4.0
#The maximum length upto which bins should be split is 1/5th to 1/10th the size of the box for the following reasons:
#- The boxes are periodic and any separation more than L/2 will require accessing other periodic boxes beyond the current one.
#- Of the available perturbation modes in the box, the K_min(biggest wavelength)= 2*pi/boxlength. There are modes below this but they are constrained by the size of the simulation box. Hence, another maxlength cutoff on length greater than K_min ~ L/(5 to 10)
#Being very flexible with this, I take the factor 4 for this run.
minLength=10.0*forceRes
eps=1.0     # need eps (epsilon) to avoid 0/0 ratio computations

In [6]:
def createBins(dR=2.0,mode='log',numOfBins=30):
    bins=[]
    if(mode!='log' and mode!='lin'):
        print("Issue with binning mode")
        exit()
        
    if(mode=='lin'):
        nBins=int(maxLength/dR)
        bins.append(np.arange(minLength,maxLength,dR))
        bins=bins[:-1]    # erasing the last entry because screwed up in cpp computation. Also unimportant because result=0.
    else:
        limit=np.log10(maxLength/minLength)
        marks=np.linspace(0.0,limit,numOfBins)
        bins=minLength*(np.power(10,marks))
    return bins

In [9]:
# Reference: https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KDTree.html 
def correlationPP(pos,bins):
    pos_tree=sk.KDTree(pos)
    c=pos_tree.two_point_correlation(pos,bins)
    c1=np.array(c[:-1])
    c2=np.array(c[1:])
    return (c2-c1)

In [13]:
# Reference to timing: https://stackoverflow.com/questions/7370801/measure-time-elapsed-in-python
# Reference to random position generation : https://docs.scipy.org/doc/numpy-1.14.1/reference/generated/numpy.random.rand.html#numpy.random.rand
def correlation(pos,dR=2.0):
    bins=createBins(mode="log",numOfBins=30)
    print("Bins created successfully")
    
    rpos=np.random.rand(len(pos),3)*boxsize
    print("Random distribution generated successfully")
    
    start=time.time()
    n_rr=np.array(correlationPP(rpos,bins),dtype='double')+eps
    n_dd=np.array(correlationPP(pos,bins),dtype='double')+eps
    
    centeredBins=bins[:-1]*0.5+bins[1:]*0.5
    correlationValue_KDT=(n_dd/n_rr)-1.0
    end=time.time()
    print("Computed 2pt Correlation using KDTree in "+str(end-start)+" seconds")
    
    return centeredBins,correlationValue_KDT

In [16]:
# Reference : https://stackoverflow.com/questions/6159900/correct-way-to-write-line-to-file
def writeFile(bins,corrs,index):
    outname="KDTree "+names[index]
    with open(outname,'w') as o:
        o.write("# KDTree 2 point correlation for input file:"+names[index]+"\n")
        o.write("# Separation[R]     Correlation Value[Xi(R)]\n")
        for b,c in zip(bins,corrs):
            o.write(str(b)+" "+str(c)+"\n")
    print("Written to file")
    print("\n")

In [17]:
for i in range(0,len(names)):
    fname=path+names[i]+"_pickled"
    pos=pickle.load(open(fname,'rb'))
    b,c=correlation(pos)
    writeFile(b,c,i)

Bins created successfully
Random distribution generated successfully
Computed 2pt Correlation using KDTree in 0.9146244525909424 seconds
Written to file


Bins created successfully
Random distribution generated successfully
Computed 2pt Correlation using KDTree in 0.11552858352661133 seconds
Written to file


Bins created successfully
Random distribution generated successfully
Computed 2pt Correlation using KDTree in 3.4143781661987305 seconds
Written to file


