In [2]:
import numpy as np
import sklearn.neighbors as sk
import pickle
import time

In [1]:
path="./../../../DLA Mock Catalogue/"
path2="./../Data/"
names=["DLAhost_snap49_r1_b1 (copy).txt","DLAhost_snap49_r1_b1alpha (copy).txt","DLAhost_snap49_r1_b1T10 (copy).txt"]
boxsize=150.0
eps=1.0     # need eps (epsilon) to avoid 0/0 ratio computations

In [4]:
def createBins(dR=2.0):
    maxLength=boxsize*np.sqrt(3)
    nBins=int(maxLength/dR)
    offset=(maxLength-(dR*nBins))/2.0
    bins=np.arange(0.0+offset,maxLength,dR)
    bins=bins[:-1]    # erasing the last entry because screwed up in cpp computation. Also unimportant because result=0.
    return bins

In [5]:
# Reference: https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KDTree.html 
def correlationPP(pos,bins):    
    pos_tree=sk.KDTree(pos)
    c=pos_tree.two_point_correlation(pos,bins)
    c1=np.array(c[:-1])
    c2=np.array(c[1:])
    return (c2-c1)

In [6]:
# Reference to timing: https://stackoverflow.com/questions/7370801/measure-time-elapsed-in-python
# Reference to random position generation : https://docs.scipy.org/doc/numpy-1.14.1/reference/generated/numpy.random.rand.html#numpy.random.rand
def correlation(pos,dR=2.0):
    bins=createBins(dR)
    print("Bins created successfully")
    
    rpos=np.random.rand(len(pos),3)*boxsize
    print("Random distribution generated successfully")
    
    start=time.time()
    n_rr=np.array(correlationPP(rpos,bins),dtype='double')+eps
    n_dd=np.array(correlationPP(pos,bins),dtype='double')+eps
    
    centeredBins=bins[:-1]*0.5+bins[1:]*0.5
    correlationValue_KDT=(n_dd/n_rr)-1.0
    end=time.time()
    print("Computed 2pt Correlation using KDTree in "+str(end-start)+" seconds")
    
    return centeredBins,correlationValue_KDT

In [1]:
# Reference : https://stackoverflow.com/questions/6159900/correct-way-to-write-line-to-file
def writeFile(bins,corrs,index):
    outname="PyOUT "+names[index]
    with open(outname,'w') as o:
        o.write("# KDTree 2 point correlation for input file:"+names[index]+"\n")
        o.write("# Separation[R]     Correlation Value[Xi(R)]\n")
        for b,c in zip(bins,corrs):
            o.write(str(b)+" "+str(c)+"\n")
    print("Written to file")

In [7]:
for i in range(0,len(names)):
    fname=path+names[i]+"_pickled"
    pos=pickle.load(open(fname,'rb'))
    b,c=correlation(pos)
    writeFile(b,c,i)

Bins created successfully
Random distribution generated successfully
Computed 2pt Correlation using KDTree in 4.526522159576416 seconds
Written to file
Bins created successfully
Random distribution generated successfully
Computed 2pt Correlation using KDTree in 0.4121212959289551 seconds
Written to file
Bins created successfully
Random distribution generated successfully
Computed 2pt Correlation using KDTree in 19.751514673233032 seconds
Written to file
