In [8]:
%matplotlib ipympl
import random
import time
import zmq
import json
import numpy
import matplotlib.pyplot as plt
from origin.client import server, random_data
import pprint
from scipy.stats import poisson
import warnings

warnings.filterwarnings("error")

In [68]:
###maximun likelihood function for compound poisson distribution

# a=sigma0, b=sigma1, n=average atom number, t=1
import numpy
import math
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from scipy.stats import poisson
from scipy.stats import gamma


def pdd(xdata, a, b, n):
    max_atoms = 10
    x = numpy.arange(max_atoms+1)
    # generate a poisson probability density distribution of size centered at n
    p_atoms = poisson.pmf(x, n)
    #probability distribution of count based on parameters a,b,n
    p_prob = numpy.zeros_like(xdata)
    #intdata = xdata.astype(int)
    for k, pa in enumerate(p_atoms):
        p_prob += pa * poisson.pmf(xdata, a+k*b)
    return p_prob

def gdd(xdata, a, b, n):
    max_atoms = 1
    x = numpy.arange(max_atoms+1)
    # generate a poisson probability density distribution of size centered at n
    p_atoms = poisson.pmf(x, n)
    norm = numpy.sum(p_atoms)
    # gamma distribution
    g_prob = numpy.zeros_like(xdata, dtype='float')
    for k, pa in enumerate(p_atoms):
        # numpy.add(g_data, pa*gamma.pdf(xdata, a+k*b), out=g_data)
        g_prob += pa * gamma.pdf(xdata, a+k*b)/norm
    print(numpy.sum(g_prob))
    return g_prob

#a = background signal, b = signal, n = mean value of atoms number
#t = exposure time
# a, b, n=10, 40, 0.3
# raw_data = numpy.array([ numpy.random.poisson(a+k*b) for k in numpy.random.poisson(n,1500) ])

#curve_fit
#distribution function pbd
#x: bin edges of histogram of raw_data
#y: probability distribution of histogram of raw_data
def fit_para(func, raw_data, p0):
    #return a tuple with (distribution-y, bins edges-x)
    hist_data = numpy.histogram(raw_data, bins=int(numpy.sqrt(len(raw_data))), density=True) #return a tuple with (distribution-y, bins edges-x)
    # shift fitting point of histogram to middle
    bin_width = hist_data[1][1]-hist_data[1][0]
    fit_bins = hist_data[1][:-1] + 0.5*bin_width
    try:
        popt, pcov = curve_fit(func, fit_bins, hist_data[0], p0=p0)
        return popt
    except RuntimeError:
        popt, pcov = curve_fit(func, fit_bins, hist_data[0], p0=None)
        return popt


# plt.plot(x,p_dist, g_dist)
# plt.hist(raw_data, bins=hist_data[1], density=True)
# xdata = numpy.linspace(0,max(raw_data),500)
# plt.plot(xdata, gdd(xdata, *popt), label='fit curve')
# plt.plot(xdata, gdd(xdata, *(a,b,n)), label='fit curve')
# plt.show()

In [69]:
"""a function that returns difference of maximum likelihood to determine the prior for choosing single/double
    poisson fit function to fix the problem of wrong seperation when there only background and no loading rate.
    for a set of random poissonian data, fit to get difference between two max likelihood value and histogram.
    determine prior where delta L is larger then a value.
    """
class Bayesian(object):

    def __init__(self):
        self.difference = numpy.array([])

    def fitness_poisson(self, x):
        """max log likelihood for poisson distribution
            x: input data"""
        max_par = numpy.mean(x)  # maximum parameter for poisson distribution is mu
        log_max = numpy.log(poisson.pmf(x, max_par))
        return numpy.sum(log_max)

    def fitness_double_poisson(self, x, popt):
        """a function that returns optimal parameters via fit_para function; max log likelihood for input data
            x: input data
            num: number of data points/ it is used to get all possible block length in bayesian.
                can be motified by creating a new function that returns possible block length."""
        new_popt = fit_para(gdd, x, popt)
        log_max = numpy.array([numpy.log(gdd(d, *new_popt)) for d in x]) #log max likelihood for each data
        return new_popt, numpy.sum(log_max) #return array with [log max likelihood of all input data (n), n-1 input data, ... 1 (newest data)]

    def llh_dif(self, data_set, popt, size):
        """a function that compare max log likelihood values for single and double poisson distribution
            data_set: pre-generated n number of poisson distribution data
            popt: pre-determined optimal parameters
            size: number of data in data set"""
        for data in data_set:
            try:
                logfit_poisson = self.fitness_poisson(data)
                print(logfit_poisson)
                logfit_double_poisson = self.fitness_double_poisson(data, popt)[1]
                print(logfit_double_poisson)
                self.difference = numpy.append(self.difference, numpy.subtract(logfit_double_poisson, logfit_poisson))
            except Exception:
                print "No optimal parameters found"
                self.difference = numpy.append(self.difference, 0)
        print self.difference
        plt.hist(self.difference, bins = int(numpy.sqrt(size)), density = True)
        plt.show()

In [70]:
# Generating raw_data set function
def generate_data(size, mu, l):
    """function that generates random data set that fit poisson distribution
        size: number of data in data set
        mu: average in poisson distribution
        l: length of each data/ number of data points"""
    data_set = numpy.array([numpy.random.poisson(mu, l) for i in xrange(size)])
    return data_set

In [71]:
#Execution
#variables
a, b, n = 10, 2, 0.5
size = 10
#generate data set
data_set = generate_data(size, a, 500)
#call class functions
test = Bayesian()
test.llh_dif(data_set, [a, b, n], size)

-1288.9232483009803
1.04394492486263
1.04394492486263
1.04394492486263
1.0439449244373316
1.0439449248056254
1.043944924841578
nan
1.0470397583545548
1.0447378679104622
1.0447378675562005
1.0447378678815824
1.044737867898225
1.0451685026191617
1.0451685022977575
1.045168502611677
1.04516850261528
1.0450630756647399
1.0450630753206422
1.0450630756636679
1.0450630756654042
1.0449180238725624
1.0449965786174131
1.0449965782665738
1.0449965786164854
1.0449965786168707
1.0450132055014631
1.04501320515117
1.0450132055007544
1.0450132055018968
1.0450034048329957
1.0450034044822214
1.045003404832895
1.0450034048329355
1.045008152272694
1.0450085269281624
1.0450085265778954
1.0450085269280984
1.0450085269282008
1.0450072338322258
1.0450072655005966
1.0450072651502378
1.0450072655005886
1.045007265500592
1.0450080097089611
1.0450079633626776
1.0450078923478985
1.0450078919976065
1.045007892347892
1.045007892347903
1.045007478620173
1.0450074782698346
1.0450074786201724
1.0450074786201728
1.04500

DeprecationWarning: Passing unrecoginized arguments to super(FigureCanvasNbAgg).__init__().
__init__() takes exactly 2 arguments (1 given)
This is deprecated in traitlets 4.2.This error will be raised in a future release of traitlets.

In [None]:
#test individual functions Ldp > Lsp

a, b, n=10, 40, 0.3
raw_double = numpy.array([ numpy.random.poisson(a+k*b) for k in numpy.random.poisson(0,1000) ]) #by double poisson with n=0
raw_poisson = numpy.random.poisson(10, 1000) #simple single poisson random with only background (a)
raw_data = numpy.array([ numpy.random.poisson(a+k*b) for k in numpy.random.poisson(0.3,1000) ])

fit_poisson = fitness_poisson(raw_poisson)
fit_double = fitness_double_poisson(raw_poisson, [a, b, 0])[1]

print fit_poisson
print fit_double
print fitness_double_poisson(raw_data, [a,b,n])[1]
plt.hist(raw_double, bins=int(numpy.sqrt(1000)), density=True)
plt.hist(raw_poisson, bins=int(numpy.sqrt(1000)), density=True)
plt.show()