In [1]:
from aeon.classification.distance_based import ProximityForest
from aeon.datasets import load_arrow_head
X, y = load_arrow_head(split='TEST', return_type = 'numpy3d')
X_test, y_test = load_arrow_head(split='TRAIN', return_type='numpy3d')

In [2]:
import time

In [3]:
clf = ProximityForest(n_trees=100, n_jobs=-1)

start = time.perf_counter()
clf.fit(X,y)
end = time.perf_counter()
print('Elapsed = {} s'.format((end-start)))

Elapsed = 1141.102961299941 s


In [4]:
from aeon.classification.distance_based import ElasticEnsemble
ee_clf = ElasticEnsemble(distance_measures = ['dtw','lcss'])

start = time.perf_counter()
ee_clf.fit(X,y)
end = time.perf_counter()
print('Elapsed = {} s'.format((end-start)))

Elapsed = 9128.582767199958 s


In [5]:
from sklearn.metrics import accuracy_score

In [6]:
y_clf = clf.predict(X_test)
accuracy_score(y_clf, y_test)

0.9444444444444444

In [8]:
y_ee = ee_clf.predict(X_test)
accuracy_score(y_ee, y_test)

0.8888888888888888

In [12]:
__all__ = ["ProximityForest"]

from typing import Type, Union
import numpy as np
from joblib import Parallel, delayed
from aeon.classification.base import BaseClassifier
from aeon.classification.distance_based import ProximityTree

class ProximityForest(BaseClassifier):
    """Proximity Forest Classifier.

    The Proximity Forest is an ensemble of Proximity Trees.
    """
    _tags = {
        "capability:multivariate": True,
        "capability:unequal_length": True,
        "capability:multithreading": True,
        "algorithm_type": "distance",
        "X_inner_type": ["np-list", "numpy3D"],
    }

    def __init__(
        self,
        n_trees=10,
        n_splitters: int = 5,
        max_depth: int = None,
        min_samples_split: int = 2,
        random_state: Union[int, Type[np.random.RandomState], None] = None,
        n_jobs: int = 1,
    ):
        self.n_trees = n_trees
        self.n_splitters = n_splitters
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.random_state = random_state
        self.n_jobs = n_jobs
        super().__init__()

    def _fit_tree(self, X, y):
        clf = ProximityTree(
            n_splitters=self.n_splitters,
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            random_state=self.random_state,
            n_jobs=self._n_jobs,  # Use self._n_jobs set by _preprocess_collection
        )
        clf.fit(X, y)
        return clf

    def _fit(self, X, y):
        # Check dimension of X
        if X.ndim == 3:
            if X.shape[1] == 1:
                X = np.squeeze(X, axis=1)
            else:
                raise ValueError("X should be univariate.")

        self.classes_ = list(np.unique(y))
        self.trees_ = Parallel(n_jobs=self._n_jobs)(
            delayed(self._fit_tree)(X, y) for _ in range(self.n_trees)
        )

        self._is_fitted = True

    def _predict_proba_tree(self, tree, X):
        return tree.predict_proba(X)

    def _predict_proba(self, X):
        # Check dimension of X
        if X.ndim == 3:
            if X.shape[1] == 1:
                X = np.squeeze(X, axis=1)
            else:
                raise ValueError("X should be univariate.")

        output_probas = Parallel(n_jobs=self._n_jobs)(
            delayed(self._predict_proba_tree)(tree, X) for tree in self.trees_
        )

        output_probas = np.sum(output_probas, axis=0)
        output_probas = np.divide(output_probas, self.n_trees)
        return output_probas

    def _predict(self, X):
        probas = self._predict_proba(X)
        idx = np.argmax(probas, axis=1)
        preds = np.asarray([self.classes_[x] for x in idx])
        return preds


In [13]:
clf2 = ProximityForest(n_trees=100)

start = time.perf_counter()
clf2.fit(X,y)
end = time.perf_counter()
print('Elapsed = {} s'.format((end-start)))

Elapsed = 533.3314680999611 s


In [14]:
y_2 = clf2.predict(X_test)
accuracy_score(y_2, y_test)

0.9444444444444444