## Supervised Approach

An additional approach that can be taken is to use a weak classifier to
attempt to discern the left and the right half of a sliding window along
the trajectory.

### Import All the Things

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ruptures as rpt  # change point detection library
import scipy as sp
from scipy.ndimage import median_filter
import sklearn as sk

import dupin as du

FILENAME = "lj-data.h5"

### Import Trajectory Data

We also normalize all features to a range of $[0, 1]$.

In [None]:
df = pd.read_hdf(FILENAME, "pruned")
df.head()

### Compute Errors

We start by creating a weak classifier and a `Window` object.
The `Window` object has a `compute` method that will train
$n - \text{window_size}$ classifiers to discriminate between
the left and right half of the window. The errors are recorded
and returned in a list.

In [None]:
# Create a weak classifier only allowing a stump
decision_tree = sk.tree.DecisionTreeClassifier(max_depth=1)

window = du.preprocessing.supervised.Window(
    classifier=decision_tree, window_size=20, test_size=0.5,
    n_classifiers=100, combine_errors="mean"
)

data = df.to_numpy()
errors = {}
errors[""] = window.compute(data)

### Smooth Error Signal

The raw errors for a classifier (see plot below) can be a bit jagged.
Different signal filter can be used to smooth out some of the noise of
the classification error.

Here we use a moving average approach and a median filter from SciPy.

In [None]:
errors["smoothed"] = du.preprocessing.signal.moving_average(y=errors[""], span=7)
errors["median"] = median_filter(errors[""], size=7, mode="reflect")

### Plot error signals

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(14, 5))
# Frame is shifted since the first window is centered
# at half its length.
frame = np.arange(7, 7 + len(window.errors))
ax[0].plot(frame, errors[""])
ax[0].title.set_text("Classifier Error")
ax[0].title.set_size(15)
ax[1].plot(frame, errors["smoothed"])
ax[1].title.set_text("Smoothed Classifier Error")
ax[1].title.set_size(15)
ax[2].plot(frame, errors["median"])
ax[2].title.set_text("Median Classifier Error")
ax[2].title.set_size(15)
fig.tight_layout();

### Detect Events

Now we use the same detector as before with a different cost
function to more appropriately find the expected signal of change.

In [None]:
detectors = {}
detectors[""] = du.detect.offline.SweepDetector(
    rpt.Dynp(model="l1"), max_change_points=6
)
detectors["smoothed"] = du.detect.offline.SweepDetector(
    rpt.Dynp(model="l1"), max_change_points=6
)
detectors["median"] = du.detect.offline.SweepDetector(
    rpt.Dynp(model="l1"), max_change_points=8
)

for filtering, detector in detectors.items():
    detector.fit(errors[filtering])
    specification = " " + filtering if filtering else filtering
    print(f"Optimal change points{specification}: {detector.opt_change_points_}")

### Plot errors with the change points and costs

In [None]:
def plot_error(ax, errors, filtering, change_points):
    ax.plot(frame, errors)
    title = "Classifier Error"
    if filtering != "":
        title = " ".join((filtering, title))
    ax.set_title("Classifier Error", size=15)
    ax.vlines(change_points, min(errors), max(errors), 'k')
    

def plot_costs(ax, detector, filtering):
    costs = detector.costs_
    ax.plot(costs)
    title = "Costs"
    if filtering != "":
        title = " ".join((filtering, title))
    ax.set_title(title, size=15)
    ax.vlines(detector.opt_n_change_points_, min(costs), max(costs), 'k')
    

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    
for ax, (filtering, detector) in zip(axes.T, detectors.items()):
    plot_error(
        ax[0], errors[filtering],
        filtering.title(), detector.opt_change_points_
    )
    plot_costs(ax[1], detector, filtering.title())
    
fig.tight_layout();