In [1]:
# Importing packages
import numpy as np
from pychange.costs import NormalMeanCost, NormalVarCost, NormalMeanVarCost, PoissonMeanVarCost, NonParametricCost
from pychange.segment import amoc_segment, pelt_segment, binary_segment
from pychange.r import RChangepoint
%load_ext line_profiler

In [2]:
# Creating Normal synthetic data
size = 2000
k = 10
min_len = 10
max_cp = 40
pen = 100
test_series = np.hstack([np.random.normal(0, 1, (size,)),
                         np.random.normal(6, 1, (size,)),
                         np.random.normal(0, 2, (size,)),
                         np.random.normal(-4, 1, (size,)),
                         np.random.normal(3, 1, (size,)),
                         ] * 4)

p_test_series = np.hstack([np.random.poisson(2, (size,)),
                         np.random.poisson(6, (size,)),
                         np.random.poisson(10, (size,)),
                         np.random.poisson(4, (size,)),
                         np.random.poisson(3, (size,)),
                         ] * 4)

# AMOC

## Normal Mean

In [20]:
# Normal mean
amoc_segment(NormalMeanCost().fit(test_series), min_len, pen)

10000

In [21]:
RChangepoint('mean', penalty='Manual', pen_value=pen, method='AMOC', minseglen=min_len).fit(test_series).predict()

array([10000.])

In [22]:
# Pychange Timing
%timeit amoc_segment(NormalMeanCost().fit(test_series), min_len, pen)

2.07 ms ± 20.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [23]:
# R timing
%timeit RChangepoint('mean', penalty='Manual', pen_value=pen, method='AMOC', test_stat="Normal", minseglen=min_len).fit(test_series).predict()

8.79 ms ± 227 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Normal Var

In [24]:
# Normal mean
amoc_segment(NormalVarCost().fit(test_series), min_len, pen)

5000

In [25]:
RChangepoint('var', penalty='Manual', pen_value=pen, method='AMOC', minseglen=min_len).fit(test_series).predict()

array([5000.])

In [26]:
# Pychange Timing
%timeit amoc_segment(NormalVarCost().fit(test_series), min_len, pen)

5.95 ms ± 75.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [27]:
# R timing
%timeit RChangepoint('var', penalty='Manual', pen_value=pen, method='AMOC', test_stat="Normal", minseglen=min_len).fit(test_series).predict()

13.8 ms ± 191 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Normal Mean+Var

In [28]:
# Normal mean
amoc_segment(NormalMeanVarCost().fit(test_series), min_len, pen)

95000

In [29]:
RChangepoint('meanvar', penalty='Manual', pen_value=pen, method='AMOC', minseglen=min_len).fit(test_series).predict()

array([95000.])

In [30]:
# Pychange Timing
%timeit amoc_segment(NormalMeanVarCost().fit(test_series), min_len, pen)

3.81 ms ± 58.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [31]:
# R timing
%timeit RChangepoint('meanvar', penalty='Manual', pen_value=pen, method='AMOC', test_stat="Normal", minseglen=min_len).fit(test_series).predict()

18.2 ms ± 559 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Poisson Mean+Var

In [4]:
# Normal mean
amoc_segment(PoissonMeanVarCost().fit(p_test_series), min_len, pen)

5000

In [5]:
RChangepoint('meanvar', penalty='Manual', pen_value=pen, method='AMOC', test_stat="Poisson", minseglen=min_len).fit(p_test_series).predict()

array([5000.])

In [6]:
# Pychange Timing
%timeit amoc_segment(PoissonMeanVarCost().fit(p_test_series), min_len, pen)

2.72 ms ± 39.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
# R timing
%timeit RChangepoint('meanvar', penalty='Manual', pen_value=pen, method='AMOC', test_stat="Poisson", minseglen=min_len).fit(p_test_series).predict()

68 ms ± 995 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Nonparametric

In [9]:
# Normal mean
amoc_segment(NonParametricCost(k).fit(test_series), min_len, pen)

10000

In [11]:
# Pychange Timing
%timeit amoc_segment(NonParametricCost(k).fit(test_series), min_len, pen)

119 ms ± 858 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


# Pelt

## Normal Mean

In [14]:
# Normal mean
pelt_segment(NormalMeanCost().fit(test_series), min_len, max_cp, pen, 1)

array([ 5000, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000,
       50000, 55000, 60000, 65000, 70000, 75000, 80000, 85000, 90000,
       95000], dtype=int64)

In [15]:
RChangepoint('mean', penalty='Manual', pen_value=pen, method='PELT', minseglen=min_len).fit(test_series).predict()

array([ 5000, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000,
       50000, 55000, 60000, 65000, 70000, 75000, 80000, 85000, 90000,
       95000], dtype=int32)

In [17]:
# Pychange Timing
%timeit pelt_segment(NormalMeanCost().fit(test_series), min_len, max_cp, pen, 1)

3.85 s ± 23.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
# R timing
%timeit RChangepoint('mean', penalty='Manual', pen_value=pen, method='PELT', minseglen=min_len).fit(test_series).predict()

1.1 s ± 12.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Normal Var

In [19]:
pelt_segment(NormalVarCost().fit(test_series), min_len, max_cp, pen, 1)

array([ 4992, 10001, 14999, 20000, 25001, 29992, 35001, 39999, 45000,
       50001, 54992, 60001, 64999, 70000, 75001, 79992, 85001, 89999,
       95000], dtype=int64)

In [20]:
RChangepoint('var', penalty='Manual', pen_value=pen, method='PELT', minseglen=min_len).fit(test_series).predict()

array([ 5000, 10001, 14999, 20000, 24991, 30000, 35001, 39999, 45000,
       49991, 55000, 60001, 64999, 70000, 74991, 80000, 85001, 89999,
       95000], dtype=int32)

In [21]:
# Pychange Timing
%timeit pelt_segment(NormalVarCost().fit(test_series), min_len, max_cp, pen, 1)

6.9 s ± 70.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [22]:
# R timing
%timeit RChangepoint('var', penalty='Manual', pen_value=pen, method='PELT', minseglen=min_len).fit(test_series).predict()

9.72 s ± 95.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Normal Mean+Var

In [23]:
pelt_segment(NormalMeanVarCost().fit(test_series), min_len, max_cp, pen, 1)

array([ 5000, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000,
       50000, 55000, 60000, 65000, 70000, 75000, 80000, 85000, 90000,
       95000], dtype=int64)

In [24]:
RChangepoint('meanvar', penalty='Manual', pen_value=pen, method='PELT', minseglen=min_len).fit(test_series).predict()

array([ 5000, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000,
       50000, 55000, 60000, 65000, 70000, 75000, 80000, 85000, 90000,
       95000], dtype=int32)

In [25]:
# Pychange Timing
%timeit pelt_segment(NormalMeanVarCost().fit(test_series), min_len, max_cp, pen, 1)

5.33 s ± 23.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [26]:
# R timing
%timeit RChangepoint('meanvar', penalty='Manual', pen_value=pen, method='PELT', minseglen=min_len).fit(test_series).predict()

13.1 s ± 123 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Poisson Mean+Var

In [27]:
pelt_segment(PoissonMeanVarCost().fit(p_test_series), min_len, max_cp, pen, 1)

array([ 5000, 10000, 15002, 19996, 25006, 30000, 35000, 40002, 44996,
       50006, 55000, 60000, 65002, 69996, 75006, 80000, 85000, 90002,
       94996], dtype=int64)

In [28]:
RChangepoint('meanvar', penalty='Manual', pen_value=pen, method='PELT', minseglen=min_len, test_stat="Poisson").fit(p_test_series).predict()

array([ 5000, 10000, 15002, 19996, 25006, 30000, 35000, 40002, 44996,
       50006, 55000, 60000, 65002, 69996, 75006, 80000, 85000, 90002,
       94996], dtype=int32)

In [29]:
# Pychange Timing
%timeit pelt_segment(PoissonMeanVarCost().fit(p_test_series), min_len, max_cp, pen, 1)

5.15 s ± 47.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [30]:
# R timing
%timeit RChangepoint('meanvar', penalty='Manual', pen_value=pen, method='PELT', minseglen=min_len, test_stat="Poisson").fit(p_test_series).predict()

16.1 s ± 130 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Nonparametric

In [3]:
pelt_segment(NonParametricCost(k).fit(test_series), min_len, max_cp, pen, 1)

array([ 2000,  4000,  5999,  8000,  9992, 12000, 14000, 15999, 18000,
       19992, 22000, 24000, 25999, 28000, 29992, 32000, 34000, 35999,
       38000], dtype=int64)

In [4]:
RChangepoint('np', penalty='Manual', pen_value=pen, method='PELT', minseglen=min_len, nquantiles=k).fit(test_series).predict()

array([ 2000,  4000,  5999,  8000,  9992, 12000, 14000, 15999, 18000,
       19992, 22000, 24000, 25999, 28000, 29992, 32000, 34000, 35999,
       38000], dtype=int32)

In [5]:
# Pychange Timing
%timeit pelt_segment(NonParametricCost(k).fit(test_series), min_len, max_cp, pen, 1)

3.01 s ± 3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
# R timing
%timeit RChangepoint('np', penalty='Manual', pen_value=pen, method='PELT', minseglen=min_len, nquantiles=k).fit(test_series).predict()

# Binary Segmentation