In [1]:
import sys

HOME_DIR = '/Users/abhisheksharma-mbpro/Documents/cpd-moht'
if HOME_DIR not in sys.path:
  sys.path.append(HOME_DIR)

In [12]:
import json

import matplotlib.pyplot as plt
import numpy as np

import cpd.metrics as cpd_metrics
import cpd.utils as cpd_utils


In [None]:
import os

DATASET_PATHS = {}
os.walk('/Users/abhisheksharma-mbpro/Documents/cpd-moht/TCPD/datasets/')
for root, dirs, files in os.walk(
  '/Users/abhisheksharma-mbpro/Documents/cpd-moht/TCPD/datasets/'
):
  json_files = [file for file in files if file.endswith('.json')]
  if json_files:
    DATASET_PATHS[os.path.basename(root)] = os.path.join(root, json_files[0])

print(DATASET_PATHS.keys())

In [14]:
ANNOTATIONS_PATH = (
  '/Users/abhisheksharma-mbpro/Documents/cpd-moht/TCPD/annotations.json'  # noqa: E501
)


In [None]:
DATASET_NAME = 'occupancy'
data, mat = cpd_utils.load_dataset(DATASET_PATHS[DATASET_NAME])
print(mat.shape)

In [6]:
annotations: dict[str, dict[str, list[int]]] = json.load(open(ANNOTATIONS_PATH))
changepoints_annotations = annotations[DATASET_NAME].get(
  next(iter(annotations[DATASET_NAME]))
)
f"""
python cpdbench_bocpdms.py \
  --input /Users/abhisheksharma-mbpro/Documents/cpd-moht/TCPD/datasets/{DATASET_NAME}/{DATASET_NAME}.json \
  --intensity 100.0 \
  --prior-a 1 \
  --prior-b 1 \
  --threshold 100 \
  --use-timeout
"""  # noqa: E501
changepoints_bocpdms = [4, 6, 95, 188, 464, 506]


In [None]:
print(
  'Average F-measure against Human Annotations: ',
  cpd_metrics.f_measure(annotations[DATASET_NAME], changepoints_bocpdms),
)

In [8]:
def visualize_dataset(mat, ax=None):
  """Visualize the time series stored in `mat`."""
  if ax is None:
    fig, ax = plt.subplots(figsize=(10, 4))

  ax.plot(
    np.arange(mat.shape[0]),
    mat.flatten(),
    marker='o',
    linestyle='-',
    markersize=3,
    c='b',
    label='Time Series',
  )

  ax.set_xlabel('Time Steps')
  ax.set_ylabel('Value')
  ax.grid(True)


def visualize_changepoints(changepoints, ax=None, color='red'):
  """Visualize the changepoints stored in `changepoints`."""
  for cp in changepoints:
    # Draw a vertical dashed line at each changepoint
    ax.axvline(
      x=cp,
      color=color,
      linestyle='--',
      alpha=0.7,
    )


# Visualize the dataset

In [None]:
# Visualize the dataset
# If the dataset has only one dimension, we don't need to plot each dimension
# separately.
if mat.shape[1] == 1:
  fig, ax = plt.subplots(2, 1, figsize=(10, 4))
  visualize_dataset(mat, ax=ax[0])
  visualize_changepoints(changepoints_bocpdms, ax=ax[0])
  for changepoints_annotations in annotations[DATASET_NAME].values():
    visualize_dataset(mat, ax=ax[1])
    visualize_changepoints(changepoints_annotations, ax=ax[1])
  ax[0].set_title(f'{DATASET_NAME} BOCPDMS')
  ax[1].set_title(f'{DATASET_NAME} Human Annotations')
  fig.tight_layout()

# If the dataset has multiple dimensions, we plot each dimension separately.
if mat.shape[1] > 1:
  fig, ax = plt.subplots(mat.shape[1], 1, figsize=(10, 6), sharex=True)
  for i in range(mat.shape[1]):
    visualize_dataset(mat[:, i : i + 1], ax=ax[i])
    visualize_changepoints(changepoints_bocpdms, ax=ax[i])
  ax[0].set_title(f'{DATASET_NAME} BOCPDMS')

  fig.tight_layout()

  fig, ax = plt.subplots(mat.shape[1], 1, figsize=(10, 5), sharex=True)
  cmap = plt.cm.get_cmap('tab10')
  for i in range(mat.shape[1]):
    visualize_dataset(mat[:, i : i + 1], ax=ax[i])
    for j, changepoints_annotations in enumerate(
      annotations[DATASET_NAME].values()
    ):
      visualize_changepoints(changepoints_annotations, ax=ax[i], color=cmap(j))
  ax[0].set_title(f'{DATASET_NAME} Human Annotations')
  fig.tight_layout()
