In [36]:
# Importing packages
import numpy as np
from numba import njit
from pychange.segment import create_summary_stats
from pychange.costs import normal_mean_var_cost
from pychange.numba_costs import normal_mean_var_cost as fast_cost

In [37]:
# Creating synthetic data
x = np.concatenate([np.random.normal(0, 1, (10000,)),
                    np.random.normal(20, 2, (10000,)),
                    np.random.normal(-10, 1, (10000,)),
                   np.random.normal(10, 1, (10000,))])

In [39]:
@njit(fastmath=True, parallel=True)
def vector_mean_var_cost(x, n):
    return n * (np.log(2 * np.pi) + np.log(np.fmax((x[:, 1] - ((x[:, 0] * x[:, 0]) / n))/ n, 1e-8) + 1))

In [40]:
vector_mean_var_cost(sum_stats, np.arange(1, sum_stats.shape[0] + 1))

array([1.83787708e+00, 5.19816399e+00, 7.79411189e+00, ...,
       2.67500957e+05, 2.67506836e+05, 2.67512740e+05])

In [41]:
def binary_segmentation(x, min_len, max_cp, penalty, cost_fn):
    """Runs binary segmentation on time series"""

    # Setting up summary statistics and objects
    n = x.shape[0]
    sum_stats = np.stack((np.append(0, x.cumsum()),
                          np.append(0, (x ** 2).cumsum()),
                          np.append(0, ((x - x.mean()) ** 2).cumsum())),
                         axis=-1)
    is_candidate = np.arange(min_len, n - min_len)
    cps = np.zeros(shape=(n,))
    costs = np.full(shape=n, fill_value=0.0)
    cps[-1] = 1
    cps[0] = 1
    costs[-1] = cost_fn(sum_stats[-1:, :], n)

    # Iterating through changepoints until convergence
    while True:

        # Single Loop Iteration
        _cps = np.flatnonzero(cps)
        best_cand, best_cost, best_next_cost, best_next = 0, 0, 0, 0
        best_total_cost = costs.sum()

        # Looping over candidates
        for c1, c2 in np.stack((_cps[:-1], _cps[1:]), axis=-1):
            _cands = is_candidate[(is_candidate > c1) & (is_candidate < c2)]
            _costs = np.empty(shape=(_cands.shape[0], 3), dtype=np.float64)
            _other_costs = costs[: (c1 + 1)].sum() + costs[(c2 + 1):].sum()
            _costs[:, 0] = cost_fn(sum_stats[_cands, :] - sum_stats[c1, :], _cands - c1)
            _costs[:, 1] = cost_fn(sum_stats[c2, :] - sum_stats[_cands, :], c2 - _cands)
            _costs[:, 2] = _costs[:, 0] + _costs[:, 1] + _other_costs + penalty
            _best_cand = np.argmin(_costs[:, 2])
            if _costs[_best_cand, 2] < best_total_cost:
                best_cand = _cands[_best_cand]
                best_cost = _costs[_best_cand, 0]
                best_next_cost = _costs[_best_cand, 1]
                best_total_cost = _costs[_best_cand, 2]
                best_next = c2

        if best_cand == 0:
            break
        else:
            cps[best_cand] = True
            costs[best_cand] = best_cost
            costs[best_next] = best_next_cost
            is_candidate[(best_cand - min_len): (best_cand + min_len)] = False
            if np.flatnonzero(cps).shape[0] > max_cp + 2:
                break
        
    return np.flatnonzero(cps)[1:-1]

In [43]:
jit_binseg = njit(fastmath=True, parallel=True)(binary_segmentation)
_ = jit_binseg(x, 30, 10, 100, vector_mean_var_cost)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1mNo implementation of function Function(<built-in function setitem>) found for signature:
 
 >>> setitem(array(float64, 1d, C), Literal[int](-1), array(float64, 1d, C))
 
There are 16 candidate implementations:
[1m   - Of which 16 did not match due to:
   Overload of function 'setitem': File: <numerous>: Line N/A.
     With argument(s): '(array(float64, 1d, C), int64, array(float64, 1d, C))':[0m
[1m    No match.[0m
[0m
[0m[1mDuring: typing of staticsetitem at <ipython-input-41-60539e5b1c56> (15)[0m
[1m
File "<ipython-input-41-60539e5b1c56>", line 15:[0m
[1mdef binary_segmentation(x, min_len, max_cp, penalty, cost_fn):
    <source elided>
    cps[0] = 1
[1m    costs[-1] = cost_fn(sum_stats[-1:, :], n)
[0m    [1m^[0m[0m


In [7]:
# Creating summary stats
sum_stats = create_summary_stats(x)

In [None]:
%timeit normal_mean_var_cost(sum_stats[-1, :], sum_stats.shape[0])

In [None]:
%timeit fast_cost(sum_stats[-1, :], sum_stats.shape[0])

In [None]:
%timeit jit_cost(sum_stats[-1, :], sum_stats.shape[0])

In [45]:
%timeit binary_segmentation(x, 30, 10, 100, vector_mean_var_cost)

ValueError: attempt to get argmin of an empty sequence

In [35]:
%timeit binary_segmentation(x, 30, 10, 100, fast_cost)

217 ms ± 6.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%timeit binary_segmentation(x, 30, 10, 100, jit_cost)

In [None]:
%timeit binary_segmentation(x, 30, 10, 100, fast_cost)

In [None]:
%timeit jit_binseg(x, 30, 10, 100, jit_cost)

In [47]:
%prun binary_segmentation(x, 30, 10, 100, vector_mean_var_cost)

 