In [1]:
# Importing packages
import numpy as np
from numba import jit
import matplotlib.pyplot as plt
%load_ext cython

In [2]:
# Creating test series
x = np.concatenate([np.random.normal(0, 1, (1500,)), np.random.normal(1, 2, (1000,))])
#x = np.random.normal(0, 1, (1500,))
print(x.shape)

(2500,)


### Pure Python

In [31]:
# Defining cost function
def mbic_meanvar(x, n):
    return n * (np.log(2 * np.pi) + np.log(np.fmax((x[1] - ((x[0] * x[0]) / n)) / n, 1e-8)) + 1) + np.log(n)

In [36]:
# Defining segmentation function
def segment(x):
    min_len = 30
    n = x.shape[0]
    sumstats = np.zeros((n + 1, 3))
    sumstats[:, 0] = np.append(0, x.cumsum())
    sumstats[:, 1] = np.append(0, (x ** 2).cumsum())
    sumstats[:, 2] = np.append(0, ((x - x.mean()) ** 2).cumsum())
    null = mbic_meanvar(sumstats[-1, :], n)
    costs = []
    for i in range(min_len, sumstats.shape[0] - min_len):

        costs.append(mbic_meanvar((sumstats[i, :]  - sumstats[0, :]), i) + mbic_meanvar((sumstats[-1, :]  - sumstats[i, :]), sumstats.shape[0] - i))
        
    # Finding best spot
    best = np.min(costs)
    if best < null:
        best_ind = np.argmin(costs) + min_len
    else:
        best_ind = None
    return best_ind

In [37]:
segment(x)

1506

In [38]:
%timeit segment(x)

41.4 ms ± 84 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Numba

In [39]:
@jit(nopython=True, fastmath=True)
def numba_mbic_meanvar(x, n):
    return n * (np.log(2 * np.pi) + np.log(np.fmax((x[1] - ((x[0] * x[0]) / n)) / n, 1e-8)) + 1) + np.log(n)

@jit(nopython=True, fastmath=True, parallel=True)
def numba_segment(x):
    best_ind = 0
    min_len = 30
    n = x.shape[0]
    sumstats = np.zeros((n + 1, 3))
    sumstats[:, 0] = np.append(0, x.cumsum())
    sumstats[:, 1] = np.append(0, (x ** 2).cumsum())
    sumstats[:, 2] = np.append(0, ((x - x.mean()) ** 2).cumsum())
    null = numba_mbic_meanvar(sumstats[-1, :], n)
    costs = []
    for i in range(min_len, sumstats.shape[0] - min_len):
        costs.append(numba_mbic_meanvar((sumstats[i, :]  - sumstats[0, :]), i) + numba_mbic_meanvar((sumstats[-1, :]  - sumstats[i, :]), sumstats.shape[0] - i))

    # Finding best spot
    costs = np.array(costs)
    if costs.min() < null:
        best_ind = np.argmin(costs) + min_len
    return best_ind

In [40]:
numba_segment(x)

1506

In [41]:
%timeit numba_segment(x)

304 µs ± 400 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Cython

In [10]:
%%cython -a

import numpy as np 
cimport numpy as np 
cimport cython

DTYPE = np.float64
ITYPE = np.int64
ctypedef np.float64_t DTYPE_t
ctypedef np.int64_t ITYPE_t

from libc.math cimport sqrt, log, M_PI, fmax, isnan

@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
cdef inline DTYPE_t cython_mbic_meanvar(np.ndarray[DTYPE_t, ndim=1] x, ITYPE_t n):
    return(n * (log(2 * M_PI) + log(fmax((x[1] - ((x[0] * x[0]) / n)) / n, 1e-8)) + 1) + log(n))

@cython.wraparound(False)
@cython.boundscheck(False)   
cpdef ITYPE_t cython_segment(np.ndarray[DTYPE_t, ndim=1] x):
    cdef ITYPE_t min_len = 30
    cdef ITYPE_t n = x.shape[0]
    cdef DTYPE_t [:, :] sumstats = np.stack([np.append(0, x.cumsum()),
                                             np.append(0, (x ** 2).cumsum()),
                                             np.append(0, ((x - x.mean()) ** 2).cumsum())],
                                           axis=-1)
    cdef ITYPE_t best_ind = 0
    cdef DTYPE_t null_cost = cython_mbic_meanvar(sumstats[n, 0], sumstats[n, 1], sumstats[n, 2], n)
    cdef DTYPE_t [:] costs = np.empty((n - 1 - 2 * min_len,))

    for i in range(min_len, n - 1 - min_len):
        a1 = sumstats[i, 0]  - sumstats[0, 0]
        a2 = sumstats[i, 1]  - sumstats[0, 1]
        a3 = sumstats[i, 2]  - sumstats[0, 2]
        b1 = sumstats[n, 0]  - sumstats[i, 0]
        b2 = sumstats[n, 1]  - sumstats[i, 1]
        b3 = sumstats[n, 2]  - sumstats[i, 2]
        costs[i - min_len] = cython_mbic_meanvar(a1, a2, a3, i) + cython_mbic_meanvar(b1, b2, b3, sumstats.shape[0] - i)
    
    # Finding best spot
    if np.min(costs) < null_cost:
        best_ind = np.argmin(costs) + min_len
    return best_ind

In [11]:
cython_segment(x)

1506

In [12]:
%timeit cython_segment(x)

448 µs ± 774 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


### Just testing cost function

In [13]:
# Prepping data
sumstats = np.stack([np.append(0, x.cumsum()),
                     np.append(0, (x ** 2).cumsum()),
                     np.append(0, ((x - x.mean()) ** 2).cumsum())],
                   axis=-1)
index = 20
row = sumstats[index, :]  - sumstats[0, :]

In [14]:
# Pure python
%timeit mbic_meanvar(*row, index)

7.83 µs ± 27.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [15]:
# Numba
%timeit numba_mbic_meanvar(row, index)

362 ns ± 4.76 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [44]:
%%cython -a

import numpy as np 
cimport numpy as np 
cimport cython

DTYPE = np.float64
ITYPE = np.int64
ctypedef np.float64_t DTYPE_t
ctypedef np.int64_t ITYPE_t

from libc.math cimport log, M_PI, fmax

@cython.wraparound(False)
@cython.binding(False)
@cython.boundscheck(False)
@cython.cdivision(True)
@cython.nonecheck(False)
@cython.overflowcheck(False)
cdef float _cython_mbic_meanvar(np.ndarray[DTYPE_t, ndim=1] x, ITYPE_t n):
    return(n * (log(2 * M_PI) + log(fmax((x[1] - ((x[0] * x[0]) / n)) / n, 1e-8)) + 1) + log(n))

def cython_mbic_meanvar(np.ndarray[DTYPE_t, ndim=1] x, int n):
    return _cython_mbic_meanvar(x, n)

In [45]:
# Cython
%timeit cython_mbic_meanvar(row, index)
# Looks like this is slightly faster than numba

338 ns ± 3.99 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## Just mean change

In [46]:
def mean_norm(x, n):
    return x[1] - (x[0] * x[0]) / n + np.log(n)

numba_mean_norm = jit(nopython=True, fastmath=True)(mean_norm)
numba_mean_norm(row, index)

24.407586749520735

In [56]:
%%cython -a

import numpy as np 
cimport numpy as np 
cimport cython

DTYPE = np.float64
ITYPE = np.int64
ctypedef np.float64_t DTYPE_t
ctypedef np.int64_t ITYPE_t

from libc.math cimport log, M_PI, fmax

@cython.wraparound(False)
@cython.binding(False)
@cython.boundscheck(False)
@cython.cdivision(True)
@cython.nonecheck(False)
@cython.overflowcheck(False)
cdef float _cython_mean_norm(np.ndarray[DTYPE_t, ndim=1] x, int n):
    return(x[1] - (x[0] * x[0]) / n + log(n))

def cython_mean_norm(np.ndarray[DTYPE_t, ndim=1] x, int n):
    return _cython_mean_norm(x, n)

In [53]:
# Pure python
%timeit mean_norm(row, index)

2.05 µs ± 6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [54]:
# Numba
%timeit numba_mean_norm(row, index)

391 ns ± 2.29 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [57]:
# Cython
%timeit cython_mean_norm(row, index)
# This one is quite a bit faster than numba

316 ns ± 7.08 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## Just Variance

In [59]:
def var_norm(x, n):
    return n * (np.log(2 * np.pi) + np.log(np.fmax(x[2], 1e-8) / n) + 1) + np.log(n)

numba_var_norm = jit(nopython=True, fastmath=True)(var_norm)
numba_var_norm(row, index)

66.47314179576694

In [61]:
%%cython -a

import numpy as np 
cimport numpy as np 
cimport cython

DTYPE = np.float64
ITYPE = np.int64
ctypedef np.float64_t DTYPE_t
ctypedef np.int64_t ITYPE_t

from libc.math cimport log, M_PI, fmax

@cython.wraparound(False)
@cython.binding(False)
@cython.boundscheck(False)
@cython.cdivision(True)
@cython.nonecheck(False)
@cython.overflowcheck(False)
cdef float _cython_var_norm(np.ndarray[DTYPE_t, ndim=1] x, int n):
    return(n * (log(2 * M_PI) + log(fmax(x[2], 1e-8) / n) + 1) + log(n))

def cython_var_norm(np.ndarray[DTYPE_t, ndim=1] x, int n):
    return _cython_var_norm(x, n)

In [62]:
# Pure python
%timeit var_norm(row, index)

6.19 µs ± 10.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [63]:
# Numba
%timeit numba_var_norm(row, index)

359 ns ± 4.05 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [64]:
# Cython
%timeit cython_var_norm(row, index)
# Numba manages to close the gap a bit here

352 ns ± 5.77 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [2]:
from pychange.costs import normal_mean_cost
from pychange.segment import amoc_segment

In [3]:
# Creating test series
x = np.concatenate([np.random.normal(0, 1, (1500,)), np.random.normal(1, 2, (1000,))])
#x = np.random.normal(0, 1, (1500,))
print(x.shape)

(2500,)


In [4]:
%timeit amoc_segment(x, 30, normal_mean_cost)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<function stack at 0x000001D05F3C44C0>) found for signature:
 
 >>> stack(list(array(float64, 1d, C))<iv=None>, axis=Literal[int](-1))
 
There are 2 candidate implementations:
[1m  - Of which 2 did not match due to:
  Overload in function 'stack': File: numba\core\typing\npydecl.py: Line 818.
    With argument(s): '(list(array(float64, 1d, C))<iv=None>, axis=int64)':[0m
[1m   Rejected as the implementation raised a specific error:
     TypeError: np.stack(): expecting a non-empty tuple of arrays, got list(array(float64, 1d, C))<iv=None>[0m
  raised from C:\Users\gursk\anaconda3\envs\changepoint\lib\site-packages\numba\core\typing\npydecl.py:772
[0m
[0m[1mDuring: resolving callee type: Function(<function stack at 0x000001D05F3C44C0>)[0m
[0m[1mDuring: typing of call at C:\Users\gursk\Desktop\Thesis Project\pychange\segment.py (7)
[0m
[1m
File "pychange\segment.py", line 7:[0m
[1mdef create_summary_stats(x):
[1m    x = np.stack([np.append(0, x.cumsum()),
[0m    [1m^[0m[0m


In [7]:
numba_normal_mean_cost = jit(nopython=True, fastmath=True)(normal_mean_cost)

In [8]:
amoc_segment(x, 30, numba_normal_mean_cost)

1494

In [9]:
%timeit amoc_segment(x, 30, numba_normal_mean_cost)

8.07 ms ± 28.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
numba_amoc_segment = jit(nopython=True, fastmath=True)(amoc_segment)

In [11]:
numba_amoc_segment(x, 30, numba_normal_mean_cost)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<function stack at 0x000002172D3534C0>) found for signature:
 
 >>> stack(list(array(float64, 1d, C))<iv=None>, axis=Literal[int](-1))
 
There are 2 candidate implementations:
[1m  - Of which 2 did not match due to:
  Overload in function 'stack': File: numba\core\typing\npydecl.py: Line 818.
    With argument(s): '(list(array(float64, 1d, C))<iv=None>, axis=int64)':[0m
[1m   Rejected as the implementation raised a specific error:
     TypeError: np.stack(): expecting a non-empty tuple of arrays, got list(array(float64, 1d, C))<iv=None>[0m
  raised from C:\Users\gursk\anaconda3\envs\changepoint\lib\site-packages\numba\core\typing\npydecl.py:772
[0m
[0m[1mDuring: resolving callee type: Function(<function stack at 0x000002172D3534C0>)[0m
[0m[1mDuring: typing of call at C:\Users\gursk\Desktop\Thesis Project\pychange\segment.py (7)
[0m
[1m
File "pychange\segment.py", line 7:[0m
[1m    def create_summary_stats(x):
[1m        x = np.stack([np.append(0, x.cumsum()),
[0m        [1m^[0m[0m
