## ABC applied to position reconstruction

*Bart Pelssers, 26-02-2018*

This notebook provides some ingredients for applying the ABC algorithm to position reconstruction, the most basic case. Just to test the framework.


* Provides:
  * prior mean
  * forward model
  * summary statistic
  
*Umberto Simola, 22-03-2018*

Provided the ABC-PMC algorithm for running the analyses.

In [1]:
import numpy as np

In [6]:
from abc_reconstruction.simpleModel import SimpleModel as Model
from abc_reconstruction.utils import Generator, PriorPosition

In [7]:
# Setup the Models
model = Model()
prior_mean = PriorPosition()
generator = Generator(model)

In [8]:
# Example pattern from x=2.6264, y=-17.96082

# The range of x and y is [-47.884375 cm, -47.884375 cm]
# But x**2 + y**2 < 47.884375**2
# Otherwise m() will raise and exception

pattern = model(2.6264, -17.96082)

print("Length of pattern: %d, Sum of pattern: %.2f" % (len(pattern), pattern.sum()))
print(pattern)

Length of pattern: 127, Sum of pattern: 500.00
[  0.14841595   0.14397198   0.13988137   0.24159058   0.18933869
   0.22591982   0.20805915   0.23506457   0.31582144   0.28612889
   0.33985588   0.38021411   0.48340495   0.56819242   0.67613899
   0.8470959    1.0991352    1.25354921   1.30006622   1.43827737
   1.17062482   1.07080343   0.89733495   0.78772861   0.62917211
   0.40026487   0.44570271   0.32362463   0.301934     0.27624214
   0.28848058   0.21737252   0.28141832   0.23859565   0.16602611
   0.21634046   0.36649959   0.47636725   0.44341384   0.45075319
   0.5022231    0.53645639   0.5805464    0.68153719   0.71537357
   0.79270054   0.91853777   1.15995621   1.30145776   1.76802897
   2.17324897   2.91035191   2.87717576   2.39581913   1.79414895
   1.44549592   1.05471978   0.81379478   0.74454349   0.69864421
   0.590455     0.62706968   0.5057492    0.42780855   0.41207846
   0.43831424   0.58574815   0.56964808   0.68396244   0.58466108
   0.61196534   0.84590827   

In [9]:
# If using a 2D Normal prior this would be a good guess for the mean (x,y) of that prior.
prior_mean(pattern)

(4.115222817130077, -15.358220637996187)

In [10]:
###Function for sampling proposal coordinates from the prior: Normal Centered on the naiveCoord, sd=15
def priorFunction(coord):
    coordX = coord[0]
    coordY = coord[1]
    xProp=np.random.normal(coordX,15,1)[0]
    yProp=np.random.normal(coordY,15,1)[0]
    while xProp**2+yProp**2>47.884375**2:
        xProp=np.random.normal(coordX,15,1)[0]
        yProp=np.random.normal(coordY,15,1)[0]
    return xProp,yProp
#priorFunction(naiveCoord)

In [11]:
###Distance function for comparing the real and the simulated dataset
def rho(x,y):
    return sum((x-y)**2)/np.shape(x)[0]
    #return sum(np.abs(x-y))/np.shape(x)[0]
#print(rho(event,SimulatedModel))

In [12]:
###Load the true position matrix for generating the Observed dataset

###For sure you can load only the positions from the original files but I got errors when trying to call the data

trueCoordMatrix=np.loadtxt('data/truepos')
#print(trueCoordMatrix[0])
#np.shape(trueCoordMatrix)

In [13]:
###ABC-SMC Definition of the necessary quantities

In [14]:
###Transformation kernel for resampling for t>0 (i.e. rather the using the prior we use this kernel)
def transfKernel(propx0,propy0,varx,vary):
    propx=np.random.normal(propx0,2*varx,1)[0]
    propy=np.random.normal(propy0,2*vary,1)[0]
    while propx**2+propy**2 > 47.884375**2:
        propx=np.random.normal(propx0,2*varx,1)[0]
        propy=np.random.normal(propy0,2*vary,1)[0]
    return propx,propy 

In [15]:
###ABC-SMC on a single selected event event j

In [16]:
import scipy.stats

###Number of particles for recunstructing the posterior for each coordinate
N=5000

###Number of iterations before stopping the algorithm
nIter=40

###Quantile used for shrinking the tolerances through the iterations
quantile=0.85

###Importance weigths for the ABC-PMC: because for t>0 we using a kernel and not the prior as proposal distribution
weights=np.zeros((nIter,N))
weights[0,:]=1/N

###Accepted elements are stored here
###abcCooord x
abcCoordsx=np.zeros((nIter,N))
###abcCooord y
abcCoordsy=np.zeros((nIter,N))

###Distance of accepted elements are stored here
d=np.zeros((nIter,N))

###First tolerance epsilon1
epsilon=4

###Total number of draws required for covering the entire analyses
totDraws=0

#########################################################################
#########################################################################
###pick the event j
j=0

###given an event, we need the true coordinates of this event and getting the 'true' dataset. The trueDataset called here
###TrueModel
trueCoord = trueCoordMatrix[j] 
TrueModel=model(trueCoord[0], trueCoord[1], 500)
###means of the priors
naiveCoord=prior_mean(TrueModel)

###Simulated accepted dataset from the last iteration
simAccData=np.zeros((N,np.shape(TrueModel)[0]))
    
for t in range(0,nIter):
    print(t)
    if(t==0):
        for i in range(0,N):
            d[t,i]=epsilon+1
            while d[t,i] > epsilon: 
                propCoord = priorFunction(naiveCoord)
                ###Deterministic FM
                #simulatedData = model(propCoord[0], propCoord[1], 500)
                ###Stochastic FM, for now adding gaussian noise (mean=0,sd=1)
                simulatedData = model(propCoord[0], propCoord[1], 500)+np.random.normal(0,1,np.shape(TrueModel)[0])
                totDraws = totDraws+1
                d[t,i] = rho(TrueModel,simulatedData) 
            abcCoordsx[t,i]=propCoord[0]
            abcCoordsy[t,i]=propCoord[1]
    else:
        epsilon= np.percentile(d[t-1,],quantile*100)
        print(epsilon)
        meanx=np.sum(abcCoordsx[t-1,:]*weights[t-1,:])
        varx=np.sum((abcCoordsx[t-1,:]-meanx)**2*weights[t-1,:])
        meany=np.sum(abcCoordsy[t-1,:]*weights[t-1,:])
        vary=np.sum((abcCoordsy[t-1,:]-meany)**2*weights[t-1,:])
        for i in range(0,N):
            d[t,i]=epsilon+1
            while d[t,i] > epsilon: 
                sample=np.random.choice(N,1,p=weights[t-1,:])
                propx0=abcCoordsx[t-1,sample]
                propy0=abcCoordsy[t-1,sample]
                prop=transfKernel(propx0,propy0,varx,vary)
                propx=prop[0]
                propy=prop[1]
                ###Deterministic FM
                #simulatedData = model(propx, propy, 500)
                ###Stochastic FM, for now adding gaussian noise (mean=0,sd=1)
                simulatedData = model(propx, propy, 500)+np.random.normal(0,1,np.shape(TrueModel)[0])
                totDraws = totDraws+1
                d[t,i] = rho(TrueModel,simulatedData)
            simAccData[i,:]=simulatedData
            abcCoordsx[t,i]=propx
            abcCoordsy[t,i]=propy 
            weightsDen=np.sum(weights[t-1,:]*scipy.stats.norm.pdf(propx,abcCoordsx[t-1,:],np.sqrt(2*varx))*scipy.stats.norm.pdf(propy,abcCoordsy[t-1,:],np.sqrt(2*vary)))
            weightsNum=scipy.stats.norm.pdf(propx,propCoord[0],10)*scipy.stats.norm.pdf(propy,propCoord[1],10)     
            weights[t,i]=weightsNum/weightsDen
    weights[t,:]=weights[t,:]/sum(weights[t,:])
print(totDraws)
abcOutput=np.column_stack((abcCoordsx[nIter-1,:],abcCoordsy[nIter-1,:]))
np.savetxt('abcPosteriorCoords_%d.dat'%(j), abcOutput)
importanceWeigths=weights[nIter-1,:]
np.savetxt('WeightsabcPosteriorCoords_%d.dat'%(j), importanceWeigths)
np.savetxt('simAccData_%d.dat'%(j), simAccData)
np.savetxt('TrueData_%d.dat'%(j), TrueModel)

0


KeyboardInterrupt: 

In [17]:
###I pick randomly 10 elements for the analyses
elements=np.random.choice(np.shape(trueCoordMatrix)[0],1)
print(elements)

[3246]


In [18]:
import scipy.stats
###ABCPMC for all the events or a random selection among the possbilities
#for j in np.random.choice(np.shape(trueCoordMatrix)[0],10):
#for j in range(0,1): 
for j in elements:
    print(j)
    N=5000
    nIter=40
    quantile=0.85
    epsilon=4
    ###Importance weigths for the ABC-PMC
    weights=np.zeros((nIter,N))
    weights[0,:]=1/N
    ###abcCooord x
    abcCoordsx=np.zeros((nIter,N))
    ###abcCooord y
    abcCoordsy=np.zeros((nIter,N))
    ###Distance
    d=np.zeros((nIter,N)) 
    
    trueCoord = trueCoordMatrix[j]
    TrueModel=model(trueCoord[0], trueCoord[1], 500)
    naiveCoord=prior_mean(TrueModel)
    totDraws=0
    t=0
    for t in range(0,nIter):
        #print(t)
        if(t==0):
            for i in range(0,N):
                d[t,i]=epsilon+1
                while d[t,i] > epsilon: 
                    propCoord = priorFunction(naiveCoord)
                    ###Deterministic FM
                    #simulatedData = model(propCoord[0], propCoord[1], 500)
                    ###Stochastic FM, for now adding gaussian noise (mean=0,sd=1)
                    simulatedData = model(propCoord[0], propCoord[1], 500)+np.random.normal(0,1,np.shape(TrueModel)[0])
                    totDraws = totDraws+1
                    d[t,i] = rho(TrueModel,simulatedData)
                abcCoordsx[t,i]=propCoord[0]
                abcCoordsy[t,i]=propCoord[1]
        else:
            epsilon= np.percentile(d[t-1,],quantile*100)
            #print(epsilon)
            meanx=np.sum(abcCoordsx[t-1,:]*weights[t-1,:])
            varx=np.sum((abcCoordsx[t-1,:]-meanx)**2*weights[t-1,:])
            meany=np.sum(abcCoordsy[t-1,:]*weights[t-1,:])
            vary=np.sum((abcCoordsy[t-1,:]-meany)**2*weights[t-1,:])
            for i in range(0,N):
                d[t,i]=epsilon+1
                while d[t,i] > epsilon: 
                    sample=np.random.choice(N,1,p=weights[t-1,:])
                    propx0=abcCoordsx[t-1,sample]
                    propy0=abcCoordsy[t-1,sample]
                    prop=transfKernel(propx0,propy0,varx,vary)
                    propx=prop[0]
                    propy=prop[1]
                    ###Deterministic FM
                    #simulatedData = model(propx, propy, 500)
                    ###Stochastic FM, for now adding gaussian noise (mean=0,sd=1)
                    simulatedData = model(propx, propy, 500)+np.random.normal(0,1,np.shape(TrueModel)[0])
                    totDraws = totDraws+1
                    d[t,i] = rho(TrueModel,simulatedData)
                abcCoordsx[t,i]=propx
                abcCoordsy[t,i]=propy
                weightsDen=np.sum(weights[t-1,:]*scipy.stats.norm.pdf(propx,abcCoordsx[t-1,:],np.sqrt(2*varx))*scipy.stats.norm.pdf(propy,abcCoordsy[t-1,:],np.sqrt(2*vary)))
                weightsNum=scipy.stats.norm.pdf(propx,propCoord[0],10)*scipy.stats.norm.pdf(propy,propCoord[1],10)     
                weights[t,i]=weightsNum/weightsDen
        weights[t,:]=weights[t,:]/sum(weights[t,:])
    ###here we save the coordinated saved in the last iteration, and corresponding weights.
    abcOutput=np.column_stack((abcCoordsx[nIter-1,:],abcCoordsy[nIter-1,:]))
    np.savetxt('abcPosteriorCoords_%d.dat'%(j), abcOutput)
    importanceWeigths=weights[nIter-1,:]
    np.savetxt('WeightsabcPosteriorCoords_%d.dat'%(j), importanceWeigths)
    print(epsilon)

3246
0.875631205344
