In [None]:
# -*- coding: utf-8 -*-
"""k-Nearest Neighbors Detector (kNN)"""

# Importing necessary libraries
from warnings import warn
import numpy as np
from sklearn.neighbors import BallTree, NearestNeighbors
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
from .base import BaseDetector

# kNN class inherits from BaseDetector, used for outlier detection based on the distance to nearest neighbors
class KNN(BaseDetector):
    """kNN-based outlier detector.

    Computes an outlier score based on the distance of a point to its k nearest neighbors.
    Supports different methods for calculating outlier scores.
    """

    def __init__(self, contamination=0.1, n_neighbors=5, method='largest',
                 radius=1.0, algorithm='auto', leaf_size=30,
                 metric='minkowski', p=2, metric_params=None, n_jobs=1,
                 **kwargs):
        """
        Initialize KNN detector with various parameters.

        Parameters:
        - contamination: Proportion of outliers in the dataset.
        - n_neighbors: Number of neighbors to use.
        - method: Method for calculating outlier score.
        - algorithm: Nearest neighbor search algorithm.
        - metric: Distance metric for neighbor calculation.
        """
        super(KNN, self).__init__(contamination=contamination)
        
        # Setting class attributes
        self.n_neighbors = n_neighbors
        self.method = method
        self.radius = radius
        self.algorithm = algorithm
        self.leaf_size = leaf_size
        self.metric = metric
        self.p = p
        self.metric_params = metric_params
        self.n_jobs = n_jobs

        # Warns user if algorithm is deprecated
        if self.algorithm != 'auto' and self.algorithm != 'ball_tree':
            warn('algorithm parameter is deprecated and will be removed in version 0.7.6. By default, ball_tree will be used.',
                 FutureWarning)

        # Initializing NearestNeighbors model
        self.neigh_ = NearestNeighbors(n_neighbors=self.n_neighbors,
                                       radius=self.radius,
                                       algorithm=self.algorithm,
                                       leaf_size=self.leaf_size,
                                       metric=self.metric,
                                       p=self.p,
                                       metric_params=self.metric_params,
                                       n_jobs=self.n_jobs,
                                       **kwargs)

    def fit(self, X, y=None):
        """Fit the kNN detector on the dataset X.

        Parameters:
        - X: Input samples, numpy array.
        - y: Not used; present for compatibility.

        Returns:
        - self: Fitted kNN detector object.
        """

        # Validates input and fits the model on X
        X = check_array(X)
        self._set_n_classes(y)
        self.neigh_.fit(X)

        # Handling case where NearestNeighbors lacks a _tree attribute
        if self.neigh_._tree is not None:
            self.tree_ = self.neigh_._tree
        else:
            # Uses BallTree if metric_params are provided
            if self.metric_params is not None:
                self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                      metric=self.metric,
                                      **self.metric_params)
            else:
                self.tree_ = BallTree(X, leaf_size=self.leaf_size,
                                      metric=self.metric)

        # Finds distances to neighbors for each point
        dist_arr, _ = self.neigh_.kneighbors(n_neighbors=self.n_neighbors,
                                             return_distance=True)
        dist = self._get_dist_by_method(dist_arr)

        # Storing outlier scores and setting up decision threshold
        self.decision_scores_ = dist.ravel()
        self._process_decision_scores()

        return self

    def decision_function(self, X):
        """Calculate anomaly scores for input samples X.

        Parameters:
        - X: Input samples, numpy array.

        Returns:
        - anomaly_scores: Anomaly scores for each sample in X.
        """
        # Checks if model has been fitted before calling this method
        check_is_fitted(self, ['tree_', 'decision_scores_', 'threshold_', 'labels_'])
        X = check_array(X)

        # Initializing output scores
        pred_scores = np.zeros([X.shape[0], 1])

        # Calculate distance for each input sample to its k nearest neighbors
        for i in range(X.shape[0]):
            x_i = X[i, :].reshape(1, -1)
            dist_arr, _ = self.tree_.query(x_i, k=self.n_neighbors)
            dist = self._get_dist_by_method(dist_arr)
            pred_scores[i, :] = dist[-1]

        return pred_scores.ravel()

    def _get_dist_by_method(self, dist_arr):
        """Determine outlier score based on distance calculation method.

        Parameters:
        - dist_arr: Distance array to k nearest neighbors.

        Returns:
        - dist: Computed outlier scores.
        """
        # Returns outlier score based on chosen method: largest, mean, or median distance
        if self.method == 'largest':
            return dist_arr[:, -1]
        elif self.method == 'mean':
            return np.mean(dist_arr, axis=1)
        elif self.method == 'median':
            return np.median(dist_arr, axis=1)
