In [1]:
from __future__ import division
import numpy as np
from scipy import stats

In [25]:
!pwd

/Users/divay/Documents/work/DATANALYTICS/personalization-user-artist-vectors/notebooks


In [4]:
from functools import partial

In [1]:
def online_changepoint_detection(data, hazard_func, observation_likelihood):
    maxes = np.zeros(len(data) + 1)
    
    R = np.zeros((len(data) + 1, len(data) + 1))
    R[0, 0] = 1
    
    for t, x in enumerate(data):
        # Evaluate the predictive distribution for the new datum under each of
        # the parameters.  This is the standard thing from Bayesian inference.
        predprobs = observation_likelihood.pdf(x)
        
        # Evaluate the hazard function for this interval
        H = hazard_func(np.array(range(t+1)))
       
        # Evaluate the growth probabilities - shift the probabilities down and to
        # the right, scaled by the hazard function and the predictive
        # probabilities.
        R[1:t+2, t+1] = R[0:t+1, t] * predprobs * (1-H)
        
        # Evaluate the probability that there *was* a changepoint and we're
        # accumulating the mass back down at r = 0.
        R[0, t+1] = np.sum( R[0:t+1, t] * predprobs * H)
        
        # Renormalize the run length probabilities for improved numerical
        # stability.
        R[:, t+1] = R[:, t+1] / np.sum(R[:, t+1])
        
        # Update the parameter sets for each possible run length.
        observation_likelihood.update_theta(x)
    
        maxes[t] = R[:, t].argmax()
    return R, maxes

In [1]:
def constant_hazard(lam, r):
    return 1/lam * np.ones(r.shape)

In [1]:
class StudentT:
    def __init__(self, alpha, beta, kappa, mu):
        self.alpha0 = self.alpha = np.array([alpha])
        self.beta0 = self.beta = np.array([beta])
        self.kappa0 = self.kappa = np.array([kappa])
        self.mu0 = self.mu = np.array([mu])

    def pdf(self, data):
        return stats.t.pdf(x=data, 
                           df=2*self.alpha,
                           loc=self.mu,
                           scale=np.sqrt(self.beta * (self.kappa+1) / (self.alpha *
                               self.kappa)))

    def update_theta(self, data):
        muT0 = np.concatenate((self.mu0, (self.kappa * self.mu + data) / (self.kappa + 1)))
        kappaT0 = np.concatenate((self.kappa0, self.kappa + 1.))
        alphaT0 = np.concatenate((self.alpha0, self.alpha + 0.5))
        betaT0 = np.concatenate((self.beta0, self.beta + (self.kappa * (data -
            self.mu)**2) / (2. * (self.kappa + 1.))))
            
        self.mu = muT0
        self.kappa = kappaT0
        self.alpha = alphaT0
        self.beta = betaT0

In [13]:
data = [1,3,0,0]

In [14]:
a,b = online_changepoint_detection(data, partial(constant_hazard, 250), StudentT(10, .03, 1, 0))

In [15]:
print(a)

[[1.00000000e+00 4.00000000e-03 4.00000000e-03 4.00000000e-03
  4.00000000e-03]
 [0.00000000e+00 9.96000000e-01 2.85265914e-12 3.24324646e-01
  9.75320585e-03]
 [0.00000000e+00 0.00000000e+00 9.96000000e-01 1.37562376e-12
  9.36245130e-01]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 6.71675354e-01
  1.12177227e-13]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  5.00016638e-02]]


In [21]:
StudentT(10, .03, 1, 0).pdf(data[1])*(249/250)

array([9.03908841e-20])

In [22]:
StudentT(10, .03, 1, 0).pdf(data[1])*(1/250)

array([3.63015599e-22])

In [24]:
3.63015599e-22/(9.03908841e-20+3.63015599e-22)

0.004000000002247837