The goal of this excercise is to split data automatically into two parts, after a change in trend, assuming those are count related data from Poisson distributions with different lambdas. This excercise is not original and follows the concepts of book Probabilistic-Programming-and-Bayesian-Methods-for-Hackers and PYMC tutorial https://pymc-devs.github.io/pymc/tutorial.html#

In [None]:
import numpy as np
import pymc3 as pm

from matplotlib import pyplot as plt
from IPython.core.pylabtools import figsize

%matplotlib inline

plt.style.use('bmh')
nice_colour = "#bd345d"

We will generate data from Poisson probability distributions with two different lambda paramaters. 

In [None]:
#specify params of your data
lam_1 = 1.50
count_1 = 15
lam_2 = 5.00
count_2 = 15

arr_1 = np.random.poisson(lam=lam_1, size=count_1)
arr_2 = np.random.poisson(lam=lam_2, size=count_2)
arr = np.concatenate((arr_1, arr_2))
n_arr = count_1 + count_2
av = np.average(arr)

In [None]:
figsize(12.5, 3.5)
plt.bar(np.arange(n_arr), arr, color=nice_colour)
plt.title("Our Generated Dataset");

We will assume that lambda's are drawn from exponential distribution.

In [None]:
with pm.Model() as model:
    # create lambda_1 and lambda_2 as stochastic variables
    # (random number generators)
    lambda_1 = pm.Exponential("lambda_1", av)
    lambda_2 = pm.Exponential("lambda_2", av)
    tau = pm.DiscreteUniform("tau", lower=0, upper=n_arr)
    
with model:
    idx = np.arange(n_arr) # Index
    lambda_ = pm.math.switch(tau >= idx, lambda_1, lambda_2)